# load necessary packages and data
library(tidyr)
library(dplyr)
library(readr)
library(plotly)
library(ggplot2)
library(jgcricolors)
library(Polychrome)

setwd("C:/Users/spei632/Documents/GCAM_industry/food_processing")
fig_dir <- "C:/Users/spei632/Documents/GCAM_industry/food_processing/initial_exploration/figures_energy_calorie_coef_calc"
if(!dir.exists(fig_dir)) {
  dir.create(fig_dir)
}

# load data
# energy balances data from IEA, intermediate processed version from gcamdata
IEA_en_bal <- read_csv("initial_exploration/data/L100.IEA_en_bal_ctry_hist.csv")
# FAO population and GDP data
fao_pop <- read_csv("initial_exploration/data/FAOSTAT_data_pop_12-29-2022.csv")
fao_gdp <- read_csv("initial_exploration/data/FAOSTAT_data_gdp_12-29-2022.csv")
# FAO food data
macronutrientrate_2010_2019_mean <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean.csv", skip = 8)
fao_sua_2010_2019 <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019.csv", skip = 8)
fao_fbsh_1973_2009 <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009.csv", skip = 8)
# gcamdata calorie consumption data
L101.CropMeat_Food_Pcal_R_C_Y <- read_csv("initial_exploration/data/L101.CropMeat_Food_Pcal_R_C_Y.csv")
# gcamdata GDP data
L102.pcgdp_thous90USD_Scen_R_Y <- read_csv("initial_exploration/data/L102.pcgdp_thous90USD_Scen_R_Y.csv")
L102.gdp_mil90usd_Scen_R_Y <- read_csv("initial_exploration/data/L102.gdp_mil90usd_Scen_R_Y.csv")
# resulting data from implemented infilling
L1328.EJ_R_food_F_Yh_est_to_infill_adders <- read_csv("initial_exploration/data/L1328.EJ_R_food_F_Yh_est_to_infill_adders_5-19-23.csv")
L1328.in_EJ_R_food_F_Yh_recal <- read_csv("initial_exploration/data/L1328.in_EJ_R_food_F_Yh_recal_5-19-23.csv")
L1328.in_EJ_R_indenergy_F_Yh_tmp_2 <- read_csv("initial_exploration/data/L1328.in_EJ_R_indenergy_F_Yh_tmp_2_5-19-23.csv")
L1328.out_Pcal_R_food_Yh <- read_csv("initial_exploration/data/L1328.out_Pcal_R_food_Yh_5-19-23.csv")

# set constants
CONV_KTOE_EJ <- 0.00004186
hist_years <- c(1971:2015)
gcam_hist_years <- c(1990, 2005, 2010, 2015)
conv_P_k <- 10^12

# mappings
IEA_product_fuel <- read_csv("mappings/IEA_product_fuel.csv", skip = 7)
enduse_fuel_aggregation <- read_csv("mappings/enduse_fuel_aggregation.csv", skip = 5)
IEA_ctry <- read_csv("mappings/iea_ctry.csv", skip = 5)
iso_GCAM_regID <- read_csv("mappings/iso_GCAM_regID.csv", skip = 6)
GCAM_region_names <- read_csv("mappings/GCAM_region_names.csv", skip = 6)
AGLU_ctry <- read_csv("mappings/AGLU_ctry.csv", skip = 7)
Mapping_SUA_PrimaryEquivalent <- read_csv("initial_exploration/data/Mapping_SUA_PrimaryEquivalent.csv", skip = 21)
Mapping_item_FBS_GCAM <- read_csv("initial_exploration/data/Mapping_item_FBS_GCAM.csv", skip = 7)
SUA_item_code_map <- read_csv("initial_exploration/data/SUA_item_code_map.csv", skip = 9)
Area_Region_Map <- read_csv("initial_exploration/data/Area_Region_Map.csv")
GCAM_commodity_type_mapping <- read_csv("mappings/GCAM_commodity_type_mapping.csv")

# colors and palettes
data(palette36)
palette36 <- unname(palette36)
pal_sel <- jgcricol()$pal_all

# assumptions/criteria for filtering data
min_frac_foodpro <- 0.01
min_year <- 1990
max_frac_inonspec <- 0.5
override_frac_foodpro <- 0.1
max_pval <- 0.05

Processing GDP data

Calculate GDP per capita from the GDP and population data from the FAO.

# get from the FAO data (also include population)
fao_gdp_per_cap_hist <- fao_gdp %>%
  filter(Element == "Value US$ per capita, 2015 prices" & Year <= 2015) %>%
  left_join(fao_pop %>% mutate(pop = Value * 1000) %>% dplyr::select(Area, Year, pop))

# prep FAO data
fao_gdp_per_cap_hist_relabel <- fao_gdp_per_cap_hist %>%
  # exclude USSR values in 1990, because these are covered by individual regions
  filter(!(Area == "USSR" & Year == 1990)) %>%
  # also exclude the "China" GDP since that includes Hong Kong and Macao as well, which are also broken out; there is another one that is "China, mainland" which we keep
  filter(Area != "China") %>%
  mutate(GDP_pcap_thous2015USD_FAO = Value / 1000) %>%
  dplyr::select(area = Area, year = Year, GDP_pcap_thous2015USD_FAO, pop) %>%
  # edit a few areas and join to isos and GCAM region IDs
  mutate(area = case_when(area == "Côte d'Ivoire" ~ "Cote dIvoire",
                          area == "Curaçao" ~ "Curacao",
                          area == "Democratic People's Republic of Korea" ~ "Democratic Peoples Republic of Korea",
                          area == "Lao People's Democratic Republic" ~ "Lao Peoples Democratic Republic",
                          area == "Sint Maarten (Dutch part)" ~ "Sint Maarten (Dutch Part)",
                          area == "Türkiye" ~ "Turkiye",
                          grepl("Yemen", area) ~ "Yemen",
                          TRUE ~ area)) %>%
  group_by(area, year) %>%
  summarize(GDP_pcap_thous2015USD_FAO = weighted.mean(GDP_pcap_thous2015USD_FAO, pop),
            pop = sum(pop)) %>%
  ungroup() %>%
  # get total GDP
  mutate(GDP_mil2015USD_FAO = GDP_pcap_thous2015USD_FAO * pop / 1000) %>%
  left_join(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
  left_join(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso")

GDP_per_cap_hist_calc_FAO <- fao_gdp_per_cap_hist_relabel %>%
  rename(area_FAO = area)

Select just the SSP2 data from the gcamdata GDP data.

GDP_pcap_gcamdata_sel <- L102.pcgdp_thous90USD_Scen_R_Y %>% filter(scenario == "gSSP2") %>%
  rename(GDP_pcap_thous90USD = value)

GDP_gcamdata_sel <- L102.gdp_mil90usd_Scen_R_Y %>% filter(scenario == "gSSP2") %>%
  rename(GDP_mil90USD = value)

Processing the IEA data

First get IEA data for industry (non-specified and specific industries).

# industry flows
flows_sel <- c("MINING", "CONSTRUC", "IRONSTL", "CHEMICAL", "NONFERR", "NONMET", "TRANSEQ", "MACHINE", "FOODPRO", "PAPERPRO", "WOODPRO", "TEXTILES", "INONSPEC")

# get industry data and map correctly
IEA_ind_sectors_region_fuel <- IEA_en_bal %>%
  filter(FLOW %in% flows_sel) %>%
  dplyr::select(iso, FLOW, PRODUCT, as.character(hist_years)) %>%
  left_join(select(iso_GCAM_regID, iso, GCAM_region_ID, country_name), by = "iso") %>%
  left_join(select(IEA_product_fuel, fuel, PRODUCT = product), by = "PRODUCT") %>%
  left_join(enduse_fuel_aggregation %>% dplyr::select(fuel, industry)) %>%
  # remap biomass_tradbio to biomass
  mutate(industry = if_else(fuel == "biomass_tradbio", "biomass", industry)) %>%
  pivot_longer(as.character(hist_years), names_to = "year", values_to = "value") %>%
  rename(fuel_orig = fuel, fuel = industry) %>%
  filter(!is.na(fuel)) %>%
  group_by(iso, country_name, GCAM_region_ID, year, fuel, FLOW) %>%
  # summarize and convert to EJ
  summarize(value = sum(value, na.rm = TRUE) * CONV_KTOE_EJ) %>%
  ungroup() %>%
  mutate(year = as.numeric(year)) %>%
  # get fraction of each sector to the total fuel use
  group_by(iso, country_name, GCAM_region_ID, year, fuel) %>%
  mutate(frac_sector = value / sum(value)) %>%
  ungroup()

# complete nesting so that there are values for all sectors, fuels, and years, even if 0
all_IEA_years <- unique(IEA_ind_sectors_region_fuel$year)
all_IEA_fuels <- unique(IEA_ind_sectors_region_fuel$fuel)
IEA_ind_sectors_region_fuel <- IEA_ind_sectors_region_fuel %>%
  complete(nesting(iso, country_name, GCAM_region_ID),
           year = all_IEA_years,
           FLOW = flows_sel,
           fuel = all_IEA_fuels) %>%
  mutate(value = replace_na(value, 0),
         frac_sector = replace_na(frac_sector, 0))

# aggregate to GCAM region level
IEA_ind_sectors_GCAM_region_fuel <- IEA_ind_sectors_region_fuel %>%
  group_by(GCAM_region_ID, year, fuel, FLOW) %>%
  summarize(value = sum(value)) %>%
  ungroup() %>%
  full_join(GCAM_region_names) %>%
  group_by(GCAM_region_ID, year, fuel) %>%
  mutate(frac_sector = value / sum(value)) %>%
  ungroup()

# get the regions and fuels with all their energy in INONSPEC in all years
country_fuels_all_inonspec <- IEA_ind_sectors_region_fuel %>%
  filter(FLOW == "INONSPEC") %>%
  group_by(iso, country_name, GCAM_region_ID, fuel) %>%
  filter(all(frac_sector == 1)) %>%
  dplyr::select(iso, country_name, GCAM_region_ID, fuel) %>%
  distinct()

# aggregate to total energy 
IEA_ind_sectors_region_total_en <- IEA_ind_sectors_region_fuel %>%
  group_by(iso, country_name, GCAM_region_ID, year, FLOW) %>%
  summarize(value = sum(value)) %>%
  group_by(iso, country_name, GCAM_region_ID, year) %>%
  mutate(frac = value / sum(value)) %>%
  ungroup()

# aggregate to GCAM region
IEA_ind_sectors_GCAM_region_total_en <- IEA_ind_sectors_region_total_en %>%
  group_by(GCAM_region_ID, year, FLOW) %>%
  summarize(value = sum(value)) %>%
  group_by(GCAM_region_ID, year) %>%
  mutate(frac = value / sum(value)) %>%
  ungroup()

Processing food production data

Processing the raw FAO data

Get food consumption from the FAO data. This is modified from the zchunk_LA100.FAO_SUA_PrimaryEquivalent.R processing code to process into GCAM commodities and calories, but does not do some infilling of data (using the global average values for macronutrient rates for regions missing those data) and does not exclude NEC (not otherwise classified) calorie data from the total food calories.

# modified from zchunk_LA100.FAO_SUA_PrimaryEquivalent.R - processing into GCAM commodities and calories, but not doing some infilling of data with global averages that is done in the chunk
# 2010-2019 data first
Mapping_SUA_PrimaryEquivalent %>%
  select(GCAM_commodity, item = source_item) %>%
  bind_rows(Mapping_SUA_PrimaryEquivalent %>%
              select(GCAM_commodity, item = sink_item)) %>%
  distinct() %>% arrange(GCAM_commodity) ->
  SUA_Items_GCAM

SUA_item_code_map %>%
      filter(!item %in% unique(SUA_Items_GCAM$item)) -> SUA_Items_NonGCAM
 
SUA_item_code_map %>%
  filter(item_code %in% unique(macronutrientrate_2010_2019_mean$item_code)) %>%
  left_join(SUA_Items_GCAM, by = "item") %>%
  # For NA GCAM_commodity: not elsewhere classified (NEC)
  # So we would know % of food calories not included in GCAM commodities
  mutate(GCAM_commodity = if_else(is.na(GCAM_commodity), "NEC", GCAM_commodity)) ->
  SUA_Items_Food

# get world average macronutrient rate
macronutrientrate_2010_2019_mean %>% tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>% 
  group_by(item, item_code, macronutrient) %>%
  summarise(macronutrient_value_World = mean(macronutrient_value), .groups = "drop") %>%
  ungroup() ->
  SUA_food_macronutrient_rate_World

# calculate SUA food Calories consumption by joining macronutrient rates and SUA food - adapted from zchunk_LA100.FAO_SUA_PrimaryEquivalent.R, not using world average for regions with missing data
fao_sua_2010_2019 %>%
  filter(element == "Food", item_code %in% SUA_Items_Food$item_code) %>%
  # ensure we at least have a complete series across time otherwise it may
  # throw off moving avg calculations
  complete(area_code = Area_Region_Map$area_code, year = pull(., year) %>% unique(), nesting(item_code, element), fill=list(value=0)) %>%
  rename(Food_Kt = value) %>%
  select(-element) %>%
  gcamdata::left_join_error_no_match(SUA_Items_Food, by = c("item_code")) %>%
  gcamdata::repeat_add_columns(
    tibble(macronutrient = c("calperg", "fatperc", "proteinperc"))) %>%
  left_join(macronutrientrate_2010_2019_mean %>%
          tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc),
        by = c("area_code", "item_code", "item", "macronutrient")) %>%
  # gcamdata::left_join_error_no_match(SUA_food_macronutrient_rate_World,
  #                              by = c("item_code", "item", "macronutrient")) %>%
  # mutate(macronutrient_value = if_else(is.na(macronutrient_value),
  #                                          macronutrient_value_World,
  #                                          macronutrient_value)) %>%
  # calculate total Cal, protein and fat in food
  # value was in 1000 ton or 10^ 9 g
  mutate(value = macronutrient_value * Food_Kt,
         value = if_else(macronutrient %in% c("fatperc", "proteinperc"),
                             value / 100 /1000, value)) %>% # unit from perc to Mt
  #select(-macronutrient_value, -macronutrient_value_World, -Food_Kt) %>%
  select(-macronutrient_value) %>%
  # rename element with units
  mutate(macronutrient = case_when(
        macronutrient == "calperg" ~ "MKcal",
        macronutrient == "fatperc" ~ "MtFat",
        macronutrient == "proteinperc" ~ "MtProtein" )) %>%
  gcamdata::left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") ->
  FAO_Food_Macronutrient_All_2010_2019

# calculate macronutrient rates from the aggregated values - aggregated to GCAM commodities
macronutrientrate_agg_GCAM_commod_2010_2019 <- FAO_Food_Macronutrient_All_2010_2019 %>%
  pivot_wider(names_from = macronutrient, values_from = value) %>%
  # in this calculation, we only want to include food that has an associated calorie content (to not have too high of a denominator)
  mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, Food_Kt)) %>%
  group_by(area_code, area, year, GCAM_commodity, iso, GCAM_region_ID) %>%
  summarize(Food_Kt = sum(Food_Kt_w_calorie),
            MKcal = sum(MKcal, na.rm = TRUE),
            MtFat = sum(MtFat, na.rm = TRUE),
            MtProtein = sum(MtProtein, na.rm = TRUE)) %>%
  mutate(calperg = MKcal / Food_Kt,
         fatperc = MtFat / Food_Kt * 100 * 1000,
         proteinperc = MtProtein / Food_Kt * 100 * 1000)
  
# aggregate to mean of all years
macronutrientrate_agg_GCAM_commod_2010_2019_mean <- macronutrientrate_agg_GCAM_commod_2010_2019 %>%
  group_by(area_code, area, GCAM_commodity, iso, GCAM_region_ID) %>%
  summarize(Food_Kt_sum = sum(Food_Kt),
            MKcal_sum = sum(MKcal),
            MtFat_sum = sum(MtFat),
            MtProtein_sum = sum(MtProtein),
            calperg_mean = mean(calperg, na.rm = TRUE),
            fatperc = mean(fatperc, na.rm = TRUE), 
            proteinperc = mean(proteinperc, na.rm = TRUE),
            calperg_mean_weighted = MKcal_sum / Food_Kt_sum) %>%
  ungroup()

# aggregate to total calories
FAO_Food_Macronutrient_All_2010_2019_total <- FAO_Food_Macronutrient_All_2010_2019 %>%
  # get Food_Kt both including and not including food that has no associated calorie content
  mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
  group_by(area, year, iso, GCAM_region_ID, macronutrient) %>%
  summarize(macronutrient_value = sum(value, na.rm = TRUE), 
            Food_Kt = sum(Food_Kt),
            Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
  ungroup() %>%
  mutate(year = as.numeric(year))

# aggregate to staples, meat, and other non-staples
FAO_Food_Macronutrient_All_2010_2019_total_cats <- FAO_Food_Macronutrient_All_2010_2019 %>%
  left_join(GCAM_commodity_type_mapping %>% dplyr::select(GCAM_commodity, commodity_type_broad)) %>%
  # get Food_Kt both including and not including food that has no associated calorie content
  mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
  group_by(area, year, iso, GCAM_region_ID, commodity_type_broad, macronutrient) %>%
  summarize(macronutrient_value = sum(value, na.rm = TRUE), 
            Food_Kt = sum(Food_Kt),
            Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
  ungroup() %>%
  mutate(year = as.numeric(year))

# aggregate by GCAM commodity
FAO_Food_Macronutrient_All_2010_2019_commodity <- FAO_Food_Macronutrient_All_2010_2019 %>%
  # get Food_Kt both including and not including food that has no associated calorie content
  mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
  group_by(area, year, iso, GCAM_region_ID, GCAM_commodity, macronutrient) %>%
  summarize(macronutrient_value = sum(value, na.rm = TRUE), 
            Food_Kt = sum(Food_Kt),
            Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
  ungroup() %>%
  mutate(year = as.numeric(year))

# data before 2010
fao_fbsh_1973_2009 %>%
  gcamdata::gather_years() %>%
  filter(year < min(fao_sua_2010_2019$year)) %>%
  filter(!is.na(value)) ->
  FBSH_CB

Mapping_item_FBS_GCAM %>%
  select(item_code, GCAM_commodity)%>%
  filter(!is.na(GCAM_commodity)) %>%
  left_join(FBSH_CB %>%
              # complete element
              complete(nesting(area, area_code, item_code, item, year), element,
                           fill = list(value = 0)),
                by = "item_code") %>%
  dplyr::group_by_at(vars(-value, -item, -item_code)) %>%
  summarise(value = sum(value), .groups = "drop") %>%
  gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
  gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
  gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>%
  #dplyr::group_by_at(vars(area = region, year, GCAM_commodity, element)) %>%
  dplyr::group_by_at(vars(area, region, GCAM_region_ID, year, GCAM_commodity, element, iso, unit)) %>%
  summarise(value = sum(value), .groups = "drop") ->
  FBSH_CB_reg

# get just food
FBSH_CB_reg_food <- FBSH_CB_reg %>%
  filter(element == "Food" & !is.na(unit)) %>%
  mutate(unit = "Kt")

# convert to calories using the (weighted) mean of the conversion rate from the 2010-2019 data
FBSH_CB_reg_food_cal <- FBSH_CB_reg_food %>%
  left_join(macronutrientrate_agg_GCAM_commod_2010_2019_mean %>% 
              dplyr::select(area, iso, GCAM_region_ID, GCAM_commodity, calperg = calperg_mean_weighted),
            by = c("area", "GCAM_region_ID", "iso", "GCAM_commodity")) %>%
  mutate(MKcal = value * calperg)

# aggregate to total calories
FBSH_CB_reg_food_cal_total <- FBSH_CB_reg_food_cal %>%
  # get Food_Kt both including and not including food that has no associated calorie content
  mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
  group_by(area, year, iso, GCAM_region_ID, region) %>%
  summarize(MKcal = sum(MKcal, na.rm = TRUE), 
            Food_Kt = sum(value),
            Food_Kt_w_calorie = sum(Food_Kt_w_calorie)) %>%
  ungroup()

# aggregate to staples, meat, and other non-staples
FBSH_CB_reg_food_cal_total_cats <- FBSH_CB_reg_food_cal %>%
  left_join(GCAM_commodity_type_mapping %>% dplyr::select(GCAM_commodity, commodity_type_broad)) %>%
  # get Food_Kt both including and not including food that has no associated calorie content
  mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
  group_by(area, year, iso, GCAM_region_ID, region, commodity_type_broad) %>%
  summarize(MKcal = sum(MKcal, na.rm = TRUE), 
            Food_Kt = sum(value),
            Food_Kt_w_calorie = sum(Food_Kt_w_calorie)) %>%
  ungroup()

# combine the pre-2010 and 2010-2019 data
total_cal_food_kt_all_years <- FAO_Food_Macronutrient_All_2010_2019_total %>%
  pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
  dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal) %>%
  rbind(FBSH_CB_reg_food_cal_total %>%
          dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt, Food_Kt_w_calorie)) %>%
  mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
         calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
  arrange(area, year)

total_cal_food_kt_all_years_cats <- FAO_Food_Macronutrient_All_2010_2019_total_cats %>%
  pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
  dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal, commodity_type_broad) %>%
  rbind(FBSH_CB_reg_food_cal_total_cats %>%
          dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt, Food_Kt_w_calorie, commodity_type_broad)) %>%
  mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
         calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
  arrange(area, year, commodity_type_broad)

# combine the GCAM-commodity level data
total_cal_food_kt_all_years_commodity <- FAO_Food_Macronutrient_All_2010_2019_commodity %>%
  pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
  dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal, GCAM_commodity) %>%
  rbind(FBSH_CB_reg_food_cal %>%
          # get Food_Kt both including and not including food that has no associated calorie content
          mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
          dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt = value, Food_Kt_w_calorie, GCAM_commodity)) %>%
  mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
         calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
  arrange(area, year, GCAM_commodity)

# also obtain animal feed in production units
fao_sua_2010_2019 %>%
  filter(element == "Feed", item_code %in% SUA_Items_Food$item_code) %>%
  rename(Feed_Kt = value) %>%
  select(-element) %>%
  gcamdata::left_join_error_no_match(SUA_Items_Food, by = c("item_code")) %>%
  gcamdata::left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") ->
  FAO_Feed_Production_All_2010_2019

# aggregate to total feed production by region
total_feed_kt_2010_2019 <- FAO_Feed_Production_All_2010_2019 %>%
  group_by(area_code, area, year, iso, GCAM_region_ID) %>%
  summarize(Feed_Kt = sum(Feed_Kt)) %>%
  mutate(year = as.numeric(year)) %>%
  ungroup()

# get pre-2010 data for feed
FBSH_CB_reg_feed <- FBSH_CB_reg %>%
  filter(element == "Feed" & !is.na(unit)) %>%
  mutate(unit = "Kt")

total_feed_kt_till_2010 <- FBSH_CB_reg_feed %>%
  group_by(area, year, iso, GCAM_region_ID) %>%
  summarize(Feed_Kt = sum(value)) %>%
  ungroup()

# join
total_feed_kt_all_years <- total_feed_kt_2010_2019 %>%
  dplyr::select(area, year, iso, GCAM_region_ID, Feed_Kt) %>%
  rbind(total_feed_kt_till_2010 %>%
          dplyr::select(area, year, iso, GCAM_region_ID, Feed_Kt)) %>%
  arrange(area, year)

Processing the gcamdata food consumption data

Now using the gcamdata calorie consumption data, L101.CropMeat_Food_Pcal_R_C_Y. These data use the global average values for macronutrient rate to infill for regions that are missing those data, as well as exclude NEC (not otherwise classified) calorie data from the total food calories. Aggregate these data to total calories and calories of staples and nonstaples.

# aggregate into total calories
total_cal_food_gcamdata_GCAM_reg <- L101.CropMeat_Food_Pcal_R_C_Y %>%
  group_by(GCAM_region_ID, year) %>%
  summarize(PKcal = sum(value) / 1000) %>%
  ungroup()

# also get calories from staples and nonstaples
staples_ns_cal_food_gcamdata_GCAM_reg <- L101.CropMeat_Food_Pcal_R_C_Y %>%
  left_join(GCAM_commodity_type_mapping %>% dplyr::select(GCAM_commodity, commodity_type_broad)) %>%
  mutate(commodity_type_broad = ifelse(grepl("nonstaples", commodity_type_broad), "nonstaples", commodity_type_broad)) %>%
  group_by(GCAM_region_ID, year, commodity_type_broad) %>%
  summarize(PKcal = sum(value) / 1000) %>%
  ungroup() %>%
  pivot_wider(names_from = commodity_type_broad, values_from = PKcal)

Plot some background figures

Plotting energy use by each of the industry sectors in the IEA data, as well as specifically food processing and other non-specified industry energy. These figures are saved but not printed below.

# plot energy use by industry sectors
fig <- ggplot(IEA_ind_sectors_region_total_en,
       aes(x = year, y = value, color = FLOW)) +
  geom_line() +
  scale_color_manual(values = c("gray20", "red", "firebrick4", "deepskyblue1", "dodgerblue3", 
                                "olivedrab", "darkgreen", "palegreen", "peachpuff2", "pink", "mediumpurple", 
                                "gold1", "orange"), name = "sector") + 
  facet_wrap(~country_name, scale = "free") + 
  labs(x = "", y = "EJ", title = "Industry energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_by_sector_country_hist_IEA.png"), fig, width = 35, height = 20)

# plot fractions in food processing and non-specified industry
fig <- ggplot(IEA_ind_sectors_region_total_en %>% filter(FLOW %in% c("INONSPEC", "FOODPRO")),
       aes(x = year, y = frac, color = FLOW)) +
  geom_line() +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "slategray") + 
  scale_color_manual(values = c("firebrick4", "deepskyblue1"), name = "sector") + 
  facet_wrap(~country_name, scale = "free_x") + 
  labs(x = "", y = "Fraction", title = "Fraction of total industry energy use in non-specified industry and food processing (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_frac_inonspec_foodpro_country_hist_IEA.png"), fig, width = 35, height = 20)

# replot at a GCAM region level
fig <- ggplot(IEA_ind_sectors_GCAM_region_total_en %>%
         left_join(GCAM_region_names),
       aes(x = year, y = value, color = FLOW)) +
  geom_line() +
  scale_color_manual(values = c("gray20", "red", "firebrick4", "deepskyblue1", "dodgerblue3", 
                                "olivedrab", "darkgreen", "palegreen", "peachpuff2", "pink", "mediumpurple", 
                                "gold1", "orange"), name = "sector") + 
  facet_wrap(~region, scale = "free") + 
  labs(x = "", y = "EJ", title = "Industry energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_by_sector_GCAM_reg_hist_IEA.png"), fig, width = 35, height = 20)

fig <- ggplot(IEA_ind_sectors_GCAM_region_total_en %>% filter(FLOW %in% c("INONSPEC", "FOODPRO")) %>%
         left_join(GCAM_region_names),
       aes(x = year, y = frac, color = FLOW)) +
  geom_line() +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "slategray") + 
  scale_color_manual(values = c("firebrick4", "deepskyblue1"), name = "sector") + 
  facet_wrap(~region, scale = "free_x") + 
  labs(x = "", y = "Fraction", title = "Fraction of total industry energy use in non-specified industry and food processing (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_frac_inonspec_foodpro_GCAM_reg_hist_IEA.png"), fig, width = 35, height = 20)

# plot the share of industry energy in food processing for just the top 10 regions with the largest shares in 2015
sel_data <- IEA_ind_sectors_GCAM_region_total_en %>% 
  filter(FLOW == "FOODPRO" & year == 2015) %>%
  left_join(GCAM_region_names) %>%
  arrange(desc(frac)) %>%
  head(10) %>%
  mutate(percent = frac * 100)
sel_data <- sel_data %>% mutate(region = factor(region, levels = unique(sel_data$region)))
fig <- ggplot(sel_data,
       aes(x = region, y = percent)) +
  geom_col(fill = I("#00931d")) +
  labs(x = "", y = "Percent", title = "Share of total manufacturing energy used in food processing\nfor the top 10 regions in 2015, IEA data") + 
  theme_classic() + 
  theme(text = element_text(size = 24),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/ind_energy_frac_foodpro_GCAM_reg_2015_top_10_IEA.png"), fig, width = 12, height = 8)

# plot for all regions
sel_data_2 <- IEA_ind_sectors_GCAM_region_total_en %>% 
  filter(FLOW == "FOODPRO" & year == 2015) %>%
  left_join(GCAM_region_names) %>%
  arrange(desc(frac)) %>%
  mutate(percent = frac * 100)
sel_data_2 <- sel_data_2 %>% mutate(region = factor(region, levels = unique(sel_data_2$region)))
fig <- ggplot(sel_data_2,
       aes(x = region, y = percent)) +
  geom_col(fill = I("#00931d")) +
  labs(x = "", y = "Percent", title = "Share of total manufacturing energy used in food processing in 2015 (IEA raw data)") + 
  scale_y_continuous(breaks = c(0, 5, 10, 15, 20, 25, 30)) +
  theme_classic() + 
  theme(text = element_text(size = 12),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/ind_energy_frac_foodpro_GCAM_reg_2015_IEA.png"), fig, width = 12, height = 8)

Food processing energy use by fuel in the IEA data.

fig <- ggplot(IEA_ind_sectors_region_fuel %>% filter(FLOW == "FOODPRO"), 
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~country_name, scale = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_country_fuel_hist_IEA_all.png"), fig, width = 35, height = 20)

# also plot all data at GCAM region level
fig <- ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "FOODPRO"), 
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~region, scale = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_IEA_all.png"), fig, width = 35, height = 20)

fig <- ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "FOODPRO" & year == 2015), 
       aes(x = region, y = value, fill = fuel)) +
  geom_col() + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use in 2015 (IEA raw data)") + 
  theme_classic() + 
  theme(text = element_text(size = 12),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_IEA_all_2015.png"), fig, width = 12, height = 8)

Also plot non-specified industry energy use by fuel in the IEA data for GCAM regions.

fig <- ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "INONSPEC"), 
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~region, scale = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Nonspecified industry energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/inonspec_energy_use_by_GCAM_region_fuel_hist_IEA_all.png"), fig, width = 35, height = 20)

Calculating relationships using the gcamdata processed food consumption data

Identify the data to use in calculating these relationships: regions and years from 1990 onwards in which the fraction of total industry energy in non-specified industry energy use is less than 0.5 and the fraction in food processing is greater than 0.01, or even if the non-specified industry fraction is high, the food processing fraction is greater than 0.1. These criteria are used because if the IEA does not know the industrial sector breakdown of use of a particular fuel for a given country, it just categorizes all of that fuel use as non-specified industry fuel use. Thus, regions with high fractions of their industry energy use in non-specified industry likely do not have accurate energy use profiles for the other industrial sectors; this is particularly a problem for food processing, as we expect there to be at least some energy used for food processing in all regions.

IEA_ind_sectors_GCAM_region_total_en_frac_wider <- IEA_ind_sectors_GCAM_region_total_en %>%
  dplyr::select(-value) %>%
  pivot_wider(names_from = FLOW, values_from = frac) %>%
  left_join(GCAM_region_names)

# get regions and years to include 
GCAM_reg_years_incl <- IEA_ind_sectors_GCAM_region_total_en_frac_wider %>%
  # select only desired years past the first start year in which INONSPEC fraction is not too high and food processing fraction is not too low, or in which food processing fraction is high even if INONSPEC fraction is high
  filter(year >= min_year & ((INONSPEC < max_frac_inonspec & (!is.na(FOODPRO) & FOODPRO > min_frac_foodpro)) | FOODPRO > override_frac_foodpro)) %>%
  dplyr::select(GCAM_region_ID, year) %>%
  left_join(GCAM_region_names)

fig <- ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "FOODPRO") %>% right_join(GCAM_reg_years_incl), 
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~region, scale = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use (IEA), selected data") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
fig

ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_IEA_sel.png"), fig + theme(text = element_text(size = 30)), width = 35, height = 20)

Join the energy data to the food data and filter to just the regions and years that satisfy the criteria for “higher quality” data, that we will use to calculate the relationships between food processing energy use and food consumption.

IEA_foodpro_region_total_en <- IEA_ind_sectors_region_total_en %>%
  filter(FLOW == "FOODPRO")

# combine with energy and GDP data on a GCAM-region level
comb_data_sel_GCAM_reg_gcamdata <- IEA_foodpro_region_total_en %>%
  group_by(GCAM_region_ID, year) %>%
  summarize(EJ = sum(value)) %>%
  ungroup() %>%
  full_join(total_cal_food_gcamdata_GCAM_reg) %>%
  full_join(staples_ns_cal_food_gcamdata_GCAM_reg) %>%
  left_join(GDP_pcap_gcamdata_sel) %>%
  left_join(GDP_gcamdata_sel) %>%
  filter(year %in% all_IEA_years) %>%
  left_join(GCAM_region_names)

# filter to the data to include in the relationships calculation
comb_data_sel_GCAM_reg_gcamdata_final <- comb_data_sel_GCAM_reg_gcamdata %>%
  right_join(GCAM_reg_years_incl) %>%
  filter(!is.na(PKcal),
         PKcal > 0)

# filter to the data to infill
comb_data_sel_GCAM_reg_gcamdata_to_infill <- comb_data_sel_GCAM_reg_gcamdata %>%
  anti_join(GCAM_reg_years_incl)

Test various models and relationships.

# function to get relevant data from a linear model and print the summary
get_lm_data <- function(lm_model) {
  print(summary(lm_model))
  
  coefs_all <- data.frame(coef = unname(lm_model$coefficients),
                          pval = unname(summary(lm_model)$coef[,4]),
                          label = names(lm_model$coefficients)) %>%
  mutate(region = ifelse(grepl("region", label), substr(label, 7, nchar(label)), NA))
  
  coefs_input_vars <- coefs_all %>% 
    filter(is.na(region)) %>% 
    dplyr::select(-region) %>%
    mutate(is_sig = pval <= max_pval)
  
  regional_intercepts <- coefs_all %>%
    filter(!is.na(region)) %>%
    dplyr::select(-label) %>%
    rename(intercept = coef) %>%
    # include a variable indicating the intercept only if the intercept is significant
    mutate(intercept_if_sig = ifelse(pval <= max_pval, intercept, 0))
  
  return(list(coefs_input_vars, regional_intercepts))
}

# function to run a variety of test linear models and use their results to estimate food processing energy use data
# used_data is the data to actually use in calculating the relationships, infill_data is the data for regions/years to infill
run_models_calc_infill <- function(used_data, infill_data) {
  # run the function on the models to test to get their data
  # models with total calories
  m_PKcal_1 <- get_lm_data(lm(EJ ~ 0 + PKcal, used_data))
  PKcal_slope_1 <- (m_PKcal_1[[1]] %>% filter(label == "PKcal"))$coef[1]

  m_PKcal_2 <- get_lm_data(lm(EJ ~ PKcal, used_data))
  PKcal_slope_2 <- (m_PKcal_2[[1]] %>% filter(label == "PKcal"))$coef[1]
  overall_intercept_PKcal_2 <- (m_PKcal_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  overall_intercept_PKcal_sig_2 <- (m_PKcal_2[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]

  m_PKcal_3 <- get_lm_data(lm(EJ ~ 0 + PKcal + region, used_data))
  PKcal_slope_3 <- (m_PKcal_3[[1]] %>% filter(label == "PKcal"))$coef[1]
  region_intercepts_PKcal_3 <- m_PKcal_3[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
    rename_with(~paste0(.x, "_PKcal_3", recycle0 = TRUE), contains("intercept"))

  m_PKcal_4 <- get_lm_data(lm(EJ ~ PKcal + region, used_data))
  PKcal_slope_4 <- (m_PKcal_4[[1]] %>% filter(label == "PKcal"))$coef[1]
  overall_intercept_PKcal_4 <- (m_PKcal_4[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  overall_intercept_PKcal_sig_4 <- (m_PKcal_4[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
  region_intercepts_PKcal_4 <- m_PKcal_4[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
    rename_with(~paste0(.x, "_PKcal_4", recycle0 = TRUE), contains("intercept"))
  
  m_PKcal_5 <- get_lm_data(lm(EJ ~ PKcal + GDP, used_data))
  PKcal_slope_5 <- (m_PKcal_5[[1]] %>% filter(label == "PKcal"))$coef[1]
  GDP_slope_5 <- (m_PKcal_5[[1]] %>% filter(label == "GDP"))$coef[1]
  overall_intercept_PKcal_5 <- (m_PKcal_5[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  overall_intercept_PKcal_sig_5 <- (m_PKcal_5[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
  
  m_PKcal_6 <- get_lm_data(lm(EJ ~ PKcal + GDP + region, used_data))
  PKcal_slope_6 <- (m_PKcal_6[[1]] %>% filter(label == "PKcal"))$coef[1]
  GDP_slope_6 <- (m_PKcal_6[[1]] %>% filter(label == "GDP"))$coef[1]
  overall_intercept_PKcal_6 <- (m_PKcal_6[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  overall_intercept_PKcal_sig_6 <- (m_PKcal_6[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
  region_intercepts_PKcal_6 <- m_PKcal_6[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
    rename_with(~paste0(.x, "_PKcal_6", recycle0 = TRUE), contains("intercept"))

  # models with nonstaples calories only
  m_nonstaples_1 <- get_lm_data(lm(EJ ~ 0 + nonstaples, used_data))
  nonstaples_slope_1 <- (m_nonstaples_1[[1]] %>% filter(label == "nonstaples"))$coef[1]

  m_nonstaples_2 <- get_lm_data(lm(EJ ~ nonstaples, used_data))
  nonstaples_slope_2 <- (m_nonstaples_2[[1]] %>% filter(label == "nonstaples"))$coef[1]
  overall_intercept_nonstaples_2 <- (m_nonstaples_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  overall_intercept_nonstaples_sig_2 <- (m_nonstaples_2[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]

  m_nonstaples_3 <- get_lm_data(lm(EJ ~ 0 + nonstaples + region, used_data))
  nonstaples_slope_3 <- (m_nonstaples_3[[1]] %>% filter(label == "nonstaples"))$coef[1]
  region_intercepts_nonstaples_3 <- m_nonstaples_3[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
    rename_with(~paste0(.x, "_nonstaples_3", recycle0 = TRUE), contains("intercept"))

  m_nonstaples_4 <- get_lm_data(lm(EJ ~ nonstaples + region, used_data))
  nonstaples_slope_4 <- (m_nonstaples_4[[1]] %>% filter(label == "nonstaples"))$coef[1]
  overall_intercept_nonstaples_4 <- (m_nonstaples_4[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  overall_intercept_nonstaples_sig_4 <- (m_nonstaples_4[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
  region_intercepts_nonstaples_4 <- m_nonstaples_4[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
    rename_with(~paste0(.x, "_nonstaples_4", recycle0 = TRUE), contains("intercept"))
  
  m_nonstaples_5 <- get_lm_data(lm(EJ ~ nonstaples + GDP, used_data))
  nonstaples_slope_5 <- (m_nonstaples_5[[1]] %>% filter(label == "nonstaples"))$coef[1]
  GDP_slope_5 <- (m_nonstaples_5[[1]] %>% filter(label == "GDP"))$coef[1]
  overall_intercept_nonstaples_5 <- (m_nonstaples_5[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  overall_intercept_nonstaples_sig_5 <- (m_nonstaples_5[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
  
  m_nonstaples_6 <- get_lm_data(lm(EJ ~ nonstaples + GDP + region, used_data))
  nonstaples_slope_6 <- (m_nonstaples_6[[1]] %>% filter(label == "nonstaples"))$coef[1]
  GDP_slope_6 <- (m_nonstaples_6[[1]] %>% filter(label == "GDP"))$coef[1]
  overall_intercept_nonstaples_6 <- (m_nonstaples_6[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  overall_intercept_nonstaples_sig_6 <- (m_nonstaples_6[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
  region_intercepts_nonstaples_6 <- m_nonstaples_6[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
    rename_with(~paste0(.x, "_nonstaples_6", recycle0 = TRUE), contains("intercept"))

  # models with both staples and nonstaples calories
  m_nonstaples_staples_1 <- get_lm_data(lm(EJ ~ 0 + nonstaples + staples, used_data))
  nonstaples_staples_ns_slope_1 <- (m_nonstaples_staples_1[[1]] %>% filter(label == "nonstaples"))$coef[1]
  nonstaples_staples_s_slope_1 <- (m_nonstaples_staples_1[[1]] %>% filter(label == "staples"))$coef[1]

  m_nonstaples_staples_2 <- get_lm_data(lm(EJ ~ nonstaples + staples, used_data))
  nonstaples_staples_ns_slope_2 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "nonstaples"))$coef[1]
  nonstaples_staples_s_slope_2 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "staples"))$coef[1]
  overall_intercept_nonstaples_staples_2 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
  
  # calculate the food processing energy use based on each of these models, both for regions with good data (to see how the models compare) and for regions without good data
  hist_and_infilled_test_methods <- infill_data %>% 
    mutate(orig_data = FALSE) %>%
    rbind(used_data %>% mutate(orig_data = TRUE)) %>%
    left_join(region_intercepts_PKcal_3) %>%
    left_join(region_intercepts_PKcal_4) %>%
    left_join(region_intercepts_PKcal_6) %>%
    left_join(region_intercepts_nonstaples_3) %>%
    left_join(region_intercepts_nonstaples_4) %>%
    left_join(region_intercepts_nonstaples_6) %>%
    mutate(across(contains("intercept"), ~ replace_na(.x, 0))) %>%
    mutate(energy_est_PKcal_1 = PKcal * PKcal_slope_1,
           energy_est_PKcal_2 = PKcal * PKcal_slope_2 + overall_intercept_PKcal_2,
           energy_est_PKcal_3 = PKcal * PKcal_slope_3 + intercept_PKcal_3,
           energy_est_PKcal_4 = PKcal * PKcal_slope_4 + overall_intercept_PKcal_4 + intercept_PKcal_4,
           energy_est_PKcal_5 = PKcal * PKcal_slope_5 + GDP * GDP_slope_5 + overall_intercept_PKcal_5,
           energy_est_PKcal_6 = PKcal * PKcal_slope_6 + GDP * GDP_slope_6 + overall_intercept_PKcal_6 + intercept_PKcal_6,
           energy_est_nonstaples_1 = nonstaples * nonstaples_slope_1,
           energy_est_nonstaples_2 = nonstaples * nonstaples_slope_2 + overall_intercept_nonstaples_2,
           energy_est_nonstaples_3 = nonstaples * nonstaples_slope_3 + intercept_nonstaples_3,
           energy_est_nonstaples_4 = nonstaples * nonstaples_slope_4 + overall_intercept_nonstaples_4 + intercept_nonstaples_4,
           energy_est_nonstaples_5 = nonstaples * nonstaples_slope_5 + GDP * GDP_slope_5 + overall_intercept_nonstaples_5,
           energy_est_nonstaples_6 = nonstaples * nonstaples_slope_6 + GDP * GDP_slope_6 + overall_intercept_nonstaples_6 + intercept_nonstaples_6,
           energy_est_nonstaples_staples_1 = nonstaples * nonstaples_staples_ns_slope_1 + staples * nonstaples_staples_s_slope_1,
           energy_est_nonstaples_staples_2 = nonstaples * nonstaples_staples_ns_slope_2 + staples * nonstaples_staples_s_slope_2 + overall_intercept_nonstaples_staples_2)
  
  return(hist_and_infilled_test_methods)
}

# just going to infill from the minimum year onwards for now
hist_and_infilled_test_methods_gcamdata <- run_models_calc_infill(comb_data_sel_GCAM_reg_gcamdata_final %>%
                                                                    rename(GDP = GDP_mil90USD), comb_data_sel_GCAM_reg_gcamdata_to_infill %>% 
                                                                    filter(year >= min_year) %>% 
                                                                    rename(GDP = GDP_mil90USD)) %>%
  rename(GDP_mil90USD = GDP)
## 
## Call:
## lm(formula = EJ ~ 0 + PKcal, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83401 -0.01201  0.01547  0.12236  0.83721 
## 
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)    
## PKcal  1.20918    0.03442   35.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2698 on 435 degrees of freedom
## Multiple R-squared:  0.7394, Adjusted R-squared:  0.7388 
## F-statistic:  1234 on 1 and 435 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72022 -0.12001 -0.09853  0.02513  0.78285 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.12081    0.01426   8.471 3.81e-16 ***
## PKcal        1.03461    0.03799  27.231  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2502 on 434 degrees of freedom
## Multiple R-squared:  0.6308, Adjusted R-squared:   0.63 
## F-statistic: 741.5 on 1 and 434 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + PKcal + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63328 -0.01452  0.00111  0.02238  0.29543 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## PKcal                                  2.2590434  0.1040050  21.721  < 2e-16
## regionArgentina                        0.0104071  0.1020965   0.102   0.9189
## regionAustralia_NZ                     0.0849380  0.0201679   4.212 3.11e-05
## regionBrazil                           0.2702093  0.0276846   9.760  < 2e-16
## regionCanada                          -0.0332550  0.0310324  -1.072   0.2845
## regionCentral Asia                    -0.1646734  0.0727576  -2.263   0.0241
## regionChina                           -1.8350921  0.1410253 -13.012  < 2e-16
## regionColombia                        -0.0243059  0.0203270  -1.196   0.2325
## regionEU-12                           -0.0504720  0.0235024  -2.148   0.0323
## regionEU-15                            0.0161341  0.0507530   0.318   0.7507
## regionEurope_Eastern                  -0.0459866  0.0301992  -1.523   0.1286
## regionEurope_Non_EU                   -0.1692711  0.0229329  -7.381 8.65e-13
## regionEuropean Free Trade Association -0.0004314  0.0200577  -0.022   0.9829
## regionJapan                           -0.0357243  0.0232517  -1.536   0.1252
## regionMexico                          -0.1515020  0.0230081  -6.585 1.38e-10
## regionRussia                           0.0151043  0.0262003   0.576   0.5646
## regionSouth America_Northern          -0.0356232  0.0205331  -1.735   0.0835
## regionSouth Korea                     -0.0399617  0.0206218  -1.938   0.0533
## regionSoutheast Asia                  -0.3526609  0.0376423  -9.369  < 2e-16
## regionTaiwan                          -0.0248699  0.0201395  -1.235   0.2176
## regionUSA                              0.1857237  0.0423708   4.383 1.48e-05
##                                          
## PKcal                                 ***
## regionArgentina                          
## regionAustralia_NZ                    ***
## regionBrazil                          ***
## regionCanada                             
## regionCentral Asia                    *  
## regionChina                           ***
## regionColombia                           
## regionEU-12                           *  
## regionEU-15                              
## regionEurope_Eastern                     
## regionEurope_Non_EU                   ***
## regionEuropean Free Trade Association    
## regionJapan                              
## regionMexico                          ***
## regionRussia                             
## regionSouth America_Northern          .  
## regionSouth Korea                     .  
## regionSoutheast Asia                  ***
## regionTaiwan                             
## regionUSA                             ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.102 on 415 degrees of freedom
## Multiple R-squared:  0.9645, Adjusted R-squared:  0.9627 
## F-statistic: 536.6 on 21 and 415 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63328 -0.01452  0.00111  0.02238  0.29543 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.010407   0.102097   0.102 0.918858    
## PKcal                                  2.259043   0.104005  21.721  < 2e-16 ***
## regionAustralia_NZ                     0.074531   0.103942   0.717 0.473749    
## regionBrazil                           0.259802   0.104858   2.478 0.013622 *  
## regionCanada                          -0.043662   0.106507  -0.410 0.682058    
## regionCentral Asia                    -0.175080   0.124973  -1.401 0.161977    
## regionChina                           -1.845499   0.169972 -10.858  < 2e-16 ***
## regionColombia                        -0.034713   0.103922  -0.334 0.738526    
## regionEU-12                           -0.060879   0.104165  -0.584 0.559236    
## regionEU-15                            0.005727   0.111914   0.051 0.959212    
## regionEurope_Eastern                  -0.056394   0.106146  -0.531 0.595507    
## regionEurope_Non_EU                   -0.179678   0.104093  -1.726 0.085068 .  
## regionEuropean Free Trade Association -0.010839   0.103972  -0.104 0.917026    
## regionJapan                           -0.046131   0.104132  -0.443 0.657991    
## regionMexico                          -0.161909   0.104102  -1.555 0.120639    
## regionRussia                           0.004697   0.104633   0.045 0.964215    
## regionSouth America_Northern          -0.046030   0.104024  -0.442 0.658360    
## regionSouth Korea                     -0.050369   0.103912  -0.485 0.628127    
## regionSoutheast Asia                  -0.363068   0.107352  -3.382 0.000788 ***
## regionTaiwan                          -0.035277   0.103947  -0.339 0.734499    
## regionUSA                              0.175317   0.108806   1.611 0.107879    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.102 on 415 degrees of freedom
## Multiple R-squared:  0.9414, Adjusted R-squared:  0.9385 
## F-statistic: 333.1 on 20 and 415 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + GDP, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48117 -0.06333 -0.03162  0.04319  0.68040 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.593e-02  9.609e-03   3.739  0.00021 ***
## PKcal       7.717e-01  2.613e-02  29.532  < 2e-16 ***
## GDP         7.458e-08  2.919e-09  25.550  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1582 on 433 degrees of freedom
## Multiple R-squared:  0.8528, Adjusted R-squared:  0.8521 
## F-statistic:  1254 on 2 and 433 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + GDP + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45897 -0.01763  0.00077  0.02210  0.29694 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            2.861e-02  9.091e-02   0.315  0.75316
## PKcal                                  1.263e+00  1.327e-01   9.513  < 2e-16
## GDP                                    1.009e-07  9.640e-09  10.472  < 2e-16
## regionAustralia_NZ                     9.002e-03  9.274e-02   0.097  0.92272
## regionBrazil                           3.093e-01  9.347e-02   3.309  0.00102
## regionCanada                          -1.314e-01  9.519e-02  -1.381  0.16814
## regionCentral Asia                    -1.258e-01  1.114e-01  -1.129  0.25936
## regionChina                           -7.837e-01  1.821e-01  -4.302 2.11e-05
## regionColombia                        -3.334e-02  9.251e-02  -0.360  0.71873
## regionEU-12                           -2.970e-02  9.278e-02  -0.320  0.74906
## regionEU-15                           -5.004e-01  1.107e-01  -4.519 8.13e-06
## regionEurope_Eastern                  -2.308e-02  9.455e-02  -0.244  0.80728
## regionEurope_Non_EU                   -1.394e-01  9.275e-02  -1.503  0.13371
## regionEuropean Free Trade Association -7.401e-02  9.276e-02  -0.798  0.42541
## regionJapan                           -3.116e-01  9.610e-02  -3.242  0.00128
## regionMexico                          -1.317e-01  9.272e-02  -1.421  0.15611
## regionRussia                           5.558e-02  9.327e-02   0.596  0.55157
## regionSouth America_Northern          -5.779e-02  9.261e-02  -0.624  0.53293
## regionSouth Korea                     -7.418e-02  9.253e-02  -0.802  0.42318
## regionSoutheast Asia                  -1.432e-01  9.785e-02  -1.464  0.14397
## regionTaiwan                          -5.267e-02  9.255e-02  -0.569  0.56960
## regionUSA                             -3.553e-01  1.093e-01  -3.250  0.00125
##                                          
## (Intercept)                              
## PKcal                                 ***
## GDP                                   ***
## regionAustralia_NZ                       
## regionBrazil                          ** 
## regionCanada                             
## regionCentral Asia                       
## regionChina                           ***
## regionColombia                           
## regionEU-12                              
## regionEU-15                           ***
## regionEurope_Eastern                     
## regionEurope_Non_EU                      
## regionEuropean Free Trade Association    
## regionJapan                           ** 
## regionMexico                             
## regionRussia                             
## regionSouth America_Northern             
## regionSouth Korea                        
## regionSoutheast Asia                     
## regionTaiwan                             
## regionUSA                             ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09078 on 414 degrees of freedom
## Multiple R-squared:  0.9536, Adjusted R-squared:  0.9513 
## F-statistic: 405.5 on 21 and 414 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72557 -0.01664  0.00731  0.08833  0.57891 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## nonstaples   2.9998     0.0536   55.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1845 on 435 degrees of freedom
## Multiple R-squared:  0.8781, Adjusted R-squared:  0.8778 
## F-statistic:  3132 on 1 and 435 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.67429 -0.07662 -0.05151  0.04074  0.54457 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.06500    0.01044   6.227 1.12e-09 ***
## nonstaples   2.76976    0.06331  43.751  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.177 on 434 degrees of freedom
## Multiple R-squared:  0.8152, Adjusted R-squared:  0.8148 
## F-statistic:  1914 on 1 and 434 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63050 -0.01336  0.00128  0.01962  0.25856 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## nonstaples                             2.636533   0.118167  22.312  < 2e-16 ***
## regionArgentina                        0.043457   0.100565   0.432 0.665874    
## regionAustralia_NZ                     0.098035   0.019808   4.949 1.08e-06 ***
## regionBrazil                           0.413260   0.023194  17.817  < 2e-16 ***
## regionCanada                          -0.008711   0.030452  -0.286 0.774972    
## regionCentral Asia                    -0.072501   0.071268  -1.017 0.309601    
## regionChina                           -0.133161   0.062795  -2.121 0.034550 *  
## regionColombia                         0.005662   0.019833   0.285 0.775426    
## regionEU-12                            0.053605   0.021038   2.548 0.011195 *  
## regionEU-15                            0.278114   0.039014   7.129 4.53e-12 ***
## regionEurope_Eastern                   0.019494   0.029240   0.667 0.505345    
## regionEurope_Non_EU                   -0.045674   0.020434  -2.235 0.025935 *  
## regionEuropean Free Trade Association  0.006866   0.019746   0.348 0.728227    
## regionJapan                            0.078740   0.020729   3.798 0.000167 ***
## regionMexico                          -0.037015   0.020588  -1.798 0.072914 .  
## regionRussia                           0.177063   0.022105   8.010 1.17e-14 ***
## regionSouth America_Northern          -0.012984   0.020143  -0.645 0.519554    
## regionSouth Korea                      0.016224   0.019854   0.817 0.414301    
## regionSoutheast Asia                   0.049630   0.024254   2.046 0.041357 *  
## regionTaiwan                          -0.011025   0.019784  -0.557 0.577656    
## regionUSA                              0.385012   0.033779  11.398  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1005 on 415 degrees of freedom
## Multiple R-squared:  0.9655, Adjusted R-squared:  0.9637 
## F-statistic: 552.9 on 21 and 415 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63050 -0.01336  0.00128  0.01962  0.25856 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.043457   0.100565   0.432  0.66587    
## nonstaples                             2.636533   0.118167  22.312  < 2e-16 ***
## regionAustralia_NZ                     0.054578   0.102430   0.533  0.59444    
## regionBrazil                           0.369804   0.102792   3.598  0.00036 ***
## regionCanada                          -0.052168   0.104975  -0.497  0.61948    
## regionCentral Asia                    -0.115958   0.123107  -0.942  0.34678    
## regionChina                           -0.176618   0.116799  -1.512  0.13126    
## regionColombia                        -0.037795   0.102427  -0.369  0.71232    
## regionEU-12                            0.010148   0.102493   0.099  0.92118    
## regionEU-15                            0.234658   0.106777   2.198  0.02853 *  
## regionEurope_Eastern                  -0.023963   0.104609  -0.229  0.81892    
## regionEurope_Non_EU                   -0.089130   0.102437  -0.870  0.38475    
## regionEuropean Free Trade Association -0.036591   0.102445  -0.357  0.72114    
## regionJapan                            0.035283   0.102461   0.344  0.73076    
## regionMexico                          -0.080472   0.102449  -0.785  0.43262    
## regionRussia                           0.133606   0.102687   1.301  0.19395    
## regionSouth America_Northern          -0.056441   0.102518  -0.551  0.58224    
## regionSouth Korea                     -0.027233   0.102425  -0.266  0.79047    
## regionSoutheast Asia                   0.006173   0.103037   0.060  0.95225    
## regionTaiwan                          -0.054481   0.102434  -0.532  0.59510    
## regionUSA                              0.341556   0.105184   3.247  0.00126 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1005 on 415 degrees of freedom
## Multiple R-squared:  0.943,  Adjusted R-squared:  0.9403 
## F-statistic: 343.4 on 20 and 415 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + GDP, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49336 -0.06300 -0.03302  0.04267  0.58707 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.564e-02  8.657e-03   4.117 4.59e-05 ***
## nonstaples  2.184e+00  6.408e-02  34.079  < 2e-16 ***
## GDP         4.620e-08  3.039e-09  15.199  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1431 on 433 degrees of freedom
## Multiple R-squared:  0.8795, Adjusted R-squared:  0.8789 
## F-statistic:  1580 on 2 and 433 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + GDP + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46273 -0.01677  0.00076  0.02120  0.26115 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            4.672e-02  9.006e-02   0.519  0.60417
## nonstaples                             1.519e+00  1.525e-01   9.958  < 2e-16
## GDP                                    9.774e-08  9.609e-09  10.171  < 2e-16
## regionAustralia_NZ                    -2.453e-04  9.189e-02  -0.003  0.99787
## regionBrazil                           3.701e-01  9.206e-02   4.021  6.9e-05
## regionCanada                          -1.335e-01  9.435e-02  -1.415  0.15792
## regionCentral Asia                    -9.359e-02  1.103e-01  -0.849  0.39653
## regionChina                            1.350e-01  1.090e-01   1.239  0.21622
## regionColombia                        -3.506e-02  9.173e-02  -0.382  0.70247
## regionEU-12                            9.742e-03  9.179e-02   0.106  0.91553
## regionEU-15                           -3.552e-01  1.118e-01  -3.176  0.00160
## regionEurope_Eastern                  -5.563e-03  9.370e-02  -0.059  0.95269
## regionEurope_Non_EU                   -8.889e-02  9.174e-02  -0.969  0.33315
## regionEuropean Free Trade Association -8.660e-02  9.188e-02  -0.943  0.34646
## regionJapan                           -2.567e-01  9.614e-02  -2.670  0.00788
## regionMexico                          -8.621e-02  9.175e-02  -0.940  0.34798
## regionRussia                           1.275e-01  9.196e-02   1.386  0.16636
## regionSouth America_Northern          -6.324e-02  9.181e-02  -0.689  0.49136
## regionSouth Korea                     -6.009e-02  9.178e-02  -0.655  0.51299
## regionSoutheast Asia                   6.075e-02  9.243e-02   0.657  0.51135
## regionTaiwan                          -6.300e-02  9.174e-02  -0.687  0.49265
## regionUSA                             -2.449e-01  1.104e-01  -2.217  0.02713
##                                          
## (Intercept)                              
## nonstaples                            ***
## GDP                                   ***
## regionAustralia_NZ                       
## regionBrazil                          ***
## regionCanada                             
## regionCentral Asia                       
## regionChina                              
## regionColombia                           
## regionEU-12                              
## regionEU-15                           ** 
## regionEurope_Eastern                     
## regionEurope_Non_EU                      
## regionEuropean Free Trade Association    
## regionJapan                           ** 
## regionMexico                             
## regionRussia                             
## regionSouth America_Northern             
## regionSouth Korea                        
## regionSoutheast Asia                     
## regionTaiwan                             
## regionUSA                             *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09001 on 414 degrees of freedom
## Multiple R-squared:  0.9544, Adjusted R-squared:  0.9521 
## F-statistic: 412.8 on 21 and 414 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples + staples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70081 -0.02672  0.00531  0.07790  0.55103 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## nonstaples  4.28248    0.10547   40.60   <2e-16 ***
## staples    -1.06518    0.07918  -13.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1552 on 434 degrees of freedom
## Multiple R-squared:  0.9139, Adjusted R-squared:  0.9135 
## F-statistic:  2305 on 2 and 434 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + staples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60898 -0.07214 -0.03738  0.04320  0.51852 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.051990   0.008874   5.858 9.26e-09 ***
## nonstaples   4.038462   0.109841  36.766  < 2e-16 ***
## staples     -1.015324   0.076776 -13.224  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1496 on 433 degrees of freedom
## Multiple R-squared:  0.8683, Adjusted R-squared:  0.8677 
## F-statistic:  1428 on 2 and 433 DF,  p-value: < 2.2e-16

Compare to using per capita GDP and log of GDP in the models, just showing that total GDP is a better predictor.

summary(lm(EJ ~ PKcal + GDP_pcap_thous90USD, comb_data_sel_GCAM_reg_gcamdata_final))
## 
## Call:
## lm(formula = EJ ~ PKcal + GDP_pcap_thous90USD, data = comb_data_sel_GCAM_reg_gcamdata_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69990 -0.10570 -0.06653  0.04900  0.68536 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.0312329  0.0192851    1.62    0.106    
## PKcal               1.0796270  0.0369212   29.24  < 2e-16 ***
## GDP_pcap_thous90USD 0.0056956  0.0008683    6.56 1.54e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2389 on 433 degrees of freedom
## Multiple R-squared:  0.6642, Adjusted R-squared:  0.6626 
## F-statistic: 428.2 on 2 and 433 DF,  p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + GDP_pcap_thous90USD + region, comb_data_sel_GCAM_reg_gcamdata_final))
## 
## Call:
## lm(formula = EJ ~ PKcal + GDP_pcap_thous90USD + region, data = comb_data_sel_GCAM_reg_gcamdata_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59439 -0.03092  0.00313  0.03839  0.29984 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           -0.051684   0.100844  -0.513  0.60856    
## PKcal                                  2.146238   0.104861  20.467  < 2e-16 ***
## GDP_pcap_thous90USD                    0.009621   0.002167   4.440 1.16e-05 ***
## regionAustralia_NZ                    -0.144253   0.112986  -1.277  0.20241    
## regionBrazil                           0.281827   0.102691   2.744  0.00633 ** 
## regionCanada                          -0.283871   0.117394  -2.418  0.01603 *  
## regionCentral Asia                    -0.130503   0.122659  -1.064  0.28797    
## regionChina                           -1.650144   0.171989  -9.594  < 2e-16 ***
## regionColombia                        -0.003863   0.101893  -0.038  0.96978    
## regionEU-12                           -0.048614   0.101931  -0.477  0.63366    
## regionEU-15                           -0.113197   0.112702  -1.004  0.31578    
## regionEurope_Eastern                  -0.007978   0.104402  -0.076  0.93912    
## regionEurope_Non_EU                   -0.157040   0.101950  -1.540  0.12424    
## regionEuropean Free Trade Association -0.407524   0.135375  -3.010  0.00277 ** 
## regionJapan                           -0.241000   0.110915  -2.173  0.03036 *  
## regionMexico                          -0.143076   0.101920  -1.404  0.16112    
## regionRussia                           0.029302   0.102501   0.286  0.77512    
## regionSouth America_Northern          -0.040234   0.101764  -0.395  0.69278    
## regionSouth Korea                     -0.089201   0.102021  -0.874  0.38244    
## regionSoutheast Asia                  -0.282947   0.106550  -2.656  0.00822 ** 
## regionTaiwan                          -0.063842   0.101883  -0.627  0.53125    
## regionUSA                             -0.004658   0.113890  -0.041  0.96740    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09975 on 414 degrees of freedom
## Multiple R-squared:  0.944,  Adjusted R-squared:  0.9412 
## F-statistic: 332.4 on 21 and 414 DF,  p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + log(GDP_mil90USD), comb_data_sel_GCAM_reg_gcamdata_final))
## 
## Call:
## lm(formula = EJ ~ PKcal + log(GDP_mil90USD), data = comb_data_sel_GCAM_reg_gcamdata_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52183 -0.10943 -0.01533  0.10631  0.55149 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -1.986116   0.104855  -18.94   <2e-16 ***
## PKcal              0.728334   0.031231   23.32   <2e-16 ***
## log(GDP_mil90USD)  0.159725   0.007911   20.19   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1798 on 433 degrees of freedom
## Multiple R-squared:  0.8098, Adjusted R-squared:  0.809 
## F-statistic:   922 on 2 and 433 DF,  p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + log(GDP_pcap_thous90USD), comb_data_sel_GCAM_reg_gcamdata_final))
## 
## Call:
## lm(formula = EJ ~ PKcal + log(GDP_pcap_thous90USD), data = comb_data_sel_GCAM_reg_gcamdata_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.66717 -0.12554 -0.05791  0.08729  0.65957 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.15176    0.02894  -5.244 2.46e-07 ***
## PKcal                     1.16130    0.03605  32.217  < 2e-16 ***
## log(GDP_pcap_thous90USD)  0.11435    0.01090  10.492  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2236 on 433 degrees of freedom
## Multiple R-squared:  0.7056, Adjusted R-squared:  0.7043 
## F-statistic:   519 on 2 and 433 DF,  p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + log(GDP_mil90USD) + region, comb_data_sel_GCAM_reg_gcamdata_final))
## 
## Call:
## lm(formula = EJ ~ PKcal + log(GDP_mil90USD) + region, data = comb_data_sel_GCAM_reg_gcamdata_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62861 -0.02104  0.00191  0.02300  0.29182 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           -0.42350    0.31788  -1.332  0.18351    
## PKcal                                  2.10088    0.15111  13.903  < 2e-16 ***
## log(GDP_mil90USD)                      0.03499    0.02428   1.441  0.15031    
## regionAustralia_NZ                     0.04163    0.10629   0.392  0.69549    
## regionBrazil                           0.23551    0.10607   2.220  0.02694 *  
## regionCanada                          -0.08974    0.11107  -0.808  0.41958    
## regionCentral Asia                    -0.16200    0.12514  -1.295  0.19619    
## regionChina                           -1.70756    0.19488  -8.762  < 2e-16 ***
## regionColombia                        -0.01151    0.10503  -0.110  0.91276    
## regionEU-12                           -0.07727    0.10465  -0.738  0.46072    
## regionEU-15                           -0.05048    0.11838  -0.426  0.67004    
## regionEurope_Eastern                  -0.02414    0.10835  -0.223  0.82383    
## regionEurope_Non_EU                   -0.18525    0.10403  -1.781  0.07568 .  
## regionEuropean Free Trade Association -0.03931    0.10570  -0.372  0.71013    
## regionJapan                           -0.12215    0.11661  -1.047  0.29548    
## regionMexico                          -0.17540    0.10439  -1.680  0.09365 .  
## regionRussia                          -0.01280    0.10520  -0.122  0.90319    
## regionSouth America_Northern          -0.02778    0.10466  -0.265  0.79077    
## regionSouth Korea                     -0.06800    0.10450  -0.651  0.51558    
## regionSoutheast Asia                  -0.34438    0.10799  -3.189  0.00154 ** 
## regionTaiwan                          -0.02588    0.10402  -0.249  0.80367    
## regionUSA                              0.10781    0.11833   0.911  0.36280    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1018 on 414 degrees of freedom
## Multiple R-squared:  0.9416, Adjusted R-squared:  0.9387 
## F-statistic: 318.1 on 21 and 414 DF,  p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + log(GDP_pcap_thous90USD) + region, comb_data_sel_GCAM_reg_gcamdata_final))
## 
## Call:
## lm(formula = EJ ~ PKcal + log(GDP_pcap_thous90USD) + region, 
##     data = comb_data_sel_GCAM_reg_gcamdata_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63251 -0.01546  0.00103  0.02196  0.29382 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           -0.011989   0.115215  -0.104   0.9172    
## PKcal                                  2.209666   0.156830  14.090  < 2e-16 ***
## log(GDP_pcap_thous90USD)               0.012725   0.030227   0.421   0.6740    
## regionAustralia_NZ                     0.055239   0.113690   0.486   0.6273    
## regionBrazil                           0.267943   0.106729   2.511   0.0124 *  
## regionCanada                          -0.063324   0.116394  -0.544   0.5867    
## regionCentral Asia                    -0.161690   0.129078  -1.253   0.2110    
## regionChina                           -1.762410   0.260582  -6.763 4.61e-11 ***
## regionColombia                        -0.026919   0.105660  -0.255   0.7990    
## regionEU-12                           -0.056146   0.104873  -0.535   0.5927    
## regionEU-15                            0.009854   0.112453   0.088   0.9302    
## regionEurope_Eastern                  -0.040679   0.112618  -0.361   0.7181    
## regionEurope_Non_EU                   -0.172988   0.105401  -1.641   0.1015    
## regionEuropean Free Trade Association -0.036877   0.121067  -0.305   0.7608    
## regionJapan                           -0.060505   0.109685  -0.552   0.5815    
## regionMexico                          -0.156326   0.105046  -1.488   0.1375    
## regionRussia                           0.012970   0.106565   0.122   0.9032    
## regionSouth America_Northern          -0.045496   0.104135  -0.437   0.6624    
## regionSouth Korea                     -0.055533   0.104736  -0.530   0.5962    
## regionSoutheast Asia                  -0.331653   0.130828  -2.535   0.0116 *  
## regionTaiwan                          -0.040065   0.104670  -0.383   0.7021    
## regionUSA                              0.172524   0.109115   1.581   0.1146    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1021 on 414 degrees of freedom
## Multiple R-squared:  0.9414, Adjusted R-squared:  0.9384 
## F-statistic: 316.6 on 21 and 414 DF,  p-value: < 2.2e-16

Indeed, comparing these models with the previous results, total GDP seems to be a better predictor than log(GDP) or per capita GDP or log(per capita GDP).

Plot the results.

First looking at relationships with total calories, just historical data first. Methods, as labeled by number, are: 1) Linear model with calories (all or nonstaples) and no intercept 2) Linear model with calories (all or nonstaples) with intercept 3) Linear model with calories (all or nonstaples) and no intercept but with regional fixed effects 4) Linear model with calories (all or nonstaples) with intercept and regional fixed effects 5) Linear model with calories (all or nonstaples) and GDP with intercept 6) Linear model with calories (all or nonstaples) and GDP with intercept and regional fixed effects

plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~EJ,
            type = "scatter",
            mode = "markers",
            color = ~region,
            name = ~paste(region, "orig"),
            text = "Original data",
            marker = list(line = list(color = "black", width = 1))) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_1,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            text = "Est 1",
            marker = list(symbol = "cross")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_2,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 2",
            marker = list(symbol = "square")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_3,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 3",
            marker = list(symbol = "diamond")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_4,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 4",
            marker = list(symbol = "star")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_5,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 5",
            marker = list(symbol = "triangle-down")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_6,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 6",
            marker = list(symbol = "triangle-up")) %>%
  layout(xaxis = list(title = "PKcal"),
         yaxis = list(title = "EJ"),
         title = "Energy used in food processing vs calorie consumption")

Generally the fixed effects models are a lot better. GDP does seem to help the predictions, as well. Raw intercept (not regionally-varying) does not.

Now historical data with calories from non-staples as the predictor.

plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~nonstaples,
            y = ~EJ,
            type = "scatter",
            mode = "markers",
            color = ~region,
            name = ~paste(region, "orig"),
            text = "Original data",
            marker = list(line = list(color = "black", width = 1))) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_1,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            text = "Est 1",
            marker = list(symbol = "cross")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_2,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 2",
            marker = list(symbol = "square")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_3,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 3",
            marker = list(symbol = "diamond")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_4,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 4",
            marker = list(symbol = "star")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_5,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 5",
            marker = list(symbol = "triangle-down")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_6,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 6",
            marker = list(symbol = "triangle-up")) %>%
  layout(xaxis = list(title = "PKcal from non-staples"),
         yaxis = list(title = "EJ"),
         title = "Energy used in food processing vs non-staples consumption")

Now plot just for the regions to infill, compared to the historical data for the other regions.

plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~EJ,
            type = "scatter",
            mode = "markers",
            color = ~region,
            name = ~paste(region, "orig"),
            text = "Original data",
            marker = list(line = list(color = "black", width = 1))) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_1,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            text = "Est 1",
            marker = list(symbol = "cross")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_2,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 2",
            marker = list(symbol = "square")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_3,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 3",
            marker = list(symbol = "diamond")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_4,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 4",
            marker = list(symbol = "star")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_5,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 5",
            marker = list(symbol = "triangle-down")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_PKcal_6,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 6",
            marker = list(symbol = "triangle-up")) %>%
  layout(xaxis = list(title = "PKcal"),
         yaxis = list(title = "EJ"),
         title = "Energy used in food processing vs calorie consumption")
plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~nonstaples,
            y = ~EJ,
            type = "scatter",
            mode = "markers",
            color = ~region,
            name = ~paste(region, "orig"),
            text = "Original data",
            marker = list(line = list(color = "black", width = 1))) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_1,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            text = "Est 1",
            marker = list(symbol = "cross")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_2,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 2",
            marker = list(symbol = "square")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_3,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 3",
            marker = list(symbol = "diamond")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_4,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 4",
            marker = list(symbol = "star")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_5,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 5",
            marker = list(symbol = "triangle-down")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~nonstaples,
            y = ~energy_est_nonstaples_6,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 6",
            marker = list(symbol = "triangle-up")) %>%
  layout(xaxis = list(title = "PKcal from non-staples"),
         yaxis = list(title = "EJ"),
         title = "Energy used in food processing vs non-staples consumption")
# plot the nonstaples methods again but with x axis total calories
plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE), 
            x = ~PKcal,
            y = ~EJ,
            type = "scatter",
            mode = "markers",
            color = ~region,
            name = ~paste(region, "orig"),
            text = "Original data",
            marker = list(line = list(color = "black", width = 1))) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_nonstaples_1,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            text = "Est 1",
            marker = list(symbol = "cross")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_nonstaples_2,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 2",
            marker = list(symbol = "square")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_nonstaples_3,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 3",
            marker = list(symbol = "diamond")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_nonstaples_4,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 4",
            marker = list(symbol = "star")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_nonstaples_5,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 5",
            marker = list(symbol = "triangle-down")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE), 
            x = ~PKcal,
            y = ~energy_est_nonstaples_6,
            type = "scatter",
            mode = "markers",
            color = ~region,
            legendgroup = ~region,
            showlegend = FALSE,
            text = "Est 6",
            marker = list(symbol = "triangle-up")) %>%
  layout(xaxis = list(title = "PKcal"),
         yaxis = list(title = "EJ"),
         title = "Energy used in food processing vs calorie consumption (infill calculated with nonstaples)")

From these, for PKcal, infilling without regional fixed effects seems like it makes more sense, or with GDP (with or without fixed effects).

Now plot the predicted values relative to the observed historical values.

hist_and_infilled_test_methods_gcamdata_longer <- hist_and_infilled_test_methods_gcamdata %>%
  dplyr::select(GCAM_region_ID, year, EJ, PKcal, nonstaples, staples, scenario, GDP_pcap_thous90USD, GDP_mil90USD, region, orig_data, energy_est_PKcal_1, energy_est_PKcal_2, energy_est_PKcal_3, energy_est_PKcal_4, energy_est_PKcal_5, energy_est_PKcal_6, energy_est_nonstaples_1, energy_est_nonstaples_2, energy_est_nonstaples_3, energy_est_nonstaples_4, energy_est_nonstaples_5, energy_est_nonstaples_6) %>%
  pivot_longer(cols = c(energy_est_PKcal_1, energy_est_PKcal_2, energy_est_PKcal_3, energy_est_PKcal_4, energy_est_PKcal_5, energy_est_PKcal_6, energy_est_nonstaples_1, energy_est_nonstaples_2, energy_est_nonstaples_3, energy_est_nonstaples_4, energy_est_nonstaples_5, energy_est_nonstaples_6),
               names_to = "label",
               values_to = "value") %>%
  mutate(EJ_per_PKcal_est = value / PKcal,
         EJ_per_PKcal = EJ / PKcal)

pal_pred_plot <- c("darkred", "darkorange", "goldenrod", "gold", "darkgreen", "green4", "blue4", "lightskyblue", "purple4", "purple", "plum", "pink", "black", "darkgray")

# loop through and plot 
for (region_name in unique(hist_and_infilled_test_methods_gcamdata$region)) {
  # select data for this region
  data_sel <- hist_and_infilled_test_methods_gcamdata_longer %>% filter(region == region_name)
  # get min and max values for the plot
  x_min <- min(data_sel$PKcal) - 0.5 * min(data_sel$PKcal)
  x_max <- max(data_sel$PKcal) + 0.5 * max(data_sel$PKcal)
  y_min <- min(data_sel$value) - 0.5 * min(data_sel$value)
  y_max <- max(data_sel$value) + 0.5 * max(data_sel$value)
  
  ggplot() +
    geom_point(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE & region != region_name) %>% mutate(label = "original data, other countries"),
               aes(x = PKcal, y = EJ, color = label)) +
    geom_point(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE & region == region_name) %>% mutate(label = "original data"),
               aes(x = PKcal, y = EJ, color = label)) +
    geom_point(data = data_sel,
               aes(x = PKcal, y = value, color = label)) + 
    scale_color_manual(values = pal_pred_plot) + 
    xlim(x_min, x_max) +
    ylim(y_min, y_max) + 
    labs(x = "PKcal", y = "EJ", title = paste0(region_name, " predicted food processing energy use vs calorie consumption"))
  
  ggsave(paste0(fig_dir, "/food_pro_EJ_pred_comp_methods_", region_name, ".png"), width = 15, height = 10)
}

# plot infilled data
plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
            x = ~PKcal,
            y = ~EJ,
            color = ~region,
            type = "scatter",
            mode = "markers",
            marker = list(line = list(color = I("black"), width = 1),
                          synbol = "circle")) %>%
  add_trace(data = hist_and_infilled_test_methods_gcamdata_longer,
            x = ~PKcal, 
            y = ~value,
            color = ~region,
            symbol = ~label,
            type = "scatter",
            mode = "markers") %>%
  layout(xaxis = list(title = "PKcal"),
         yaxis = list(title = "EJ"),
         title = "Food processing energy use vs food consumption, historical and estimated data with various methods,\nhistorical outlined in black")

Compare the estimated energy per calorie coefficients, with the various methods, for the regions with data to infill relative to the regions with historical data that we used. Just looking at the average coefficients over the whole time series and the 2015 values.

infilled_test_methods_gcamdata_avg_coef <- hist_and_infilled_test_methods_gcamdata_longer %>%
  group_by(GCAM_region_ID, region, label) %>%
  mutate(EJ_per_PKcal_est_mean = mean(EJ_per_PKcal_est, na.rm = TRUE)) %>%
  ungroup()

infilled_test_methods_gcamdata_avg_coef_sel <- infilled_test_methods_gcamdata_avg_coef %>%
  dplyr::select(region, EJ_per_PKcal_mean = EJ_per_PKcal_est_mean, label) %>%
  distinct()

hist_gcamdata_avg_coef <- hist_and_infilled_test_methods_gcamdata %>% 
  filter(orig_data == TRUE) %>%
  mutate(EJ_per_PKcal = EJ / PKcal) %>%
  group_by(GCAM_region_ID, region) %>%
  mutate(EJ_per_PKcal_mean = mean(EJ_per_PKcal)) %>%
  ungroup()

hist_gcamdata_avg_coef_sel <- hist_gcamdata_avg_coef %>% 
  dplyr::select(region, EJ_per_PKcal_mean) %>%
  distinct() %>%
  mutate(label = "historical")

hist_infilled_test_methods_gcamdata_avg_coef_sel <- rbind(infilled_test_methods_gcamdata_avg_coef_sel,
                                                          hist_gcamdata_avg_coef_sel)

# plot average
plot_ly(width = 1000) %>%
  add_trace(data = hist_gcamdata_avg_coef_sel,
            x = ~label,
            y = ~EJ_per_PKcal_mean,
            color = ~region,
            type = "scatter",
            mode = "markers",
            marker = list(line = list(color = I("black"), width = 1),
                          synbol = "circle")) %>%
  add_trace(data = infilled_test_methods_gcamdata_avg_coef_sel,
            x = ~label, 
            y = ~EJ_per_PKcal_mean,
            color = ~region,
            type = "scatter",
            mode = "markers",
            name = ~paste0(region, " estimated")) %>%
  layout(xaxis = list(title = "Method"),
         yaxis = list(title = "EJ per PKcal"),
         title = "Average food processing energy use coefficient, historical and estimated data with various methods,\nhistorical outlined in black")
plot_ly(width = 1000) %>%
  add_trace(data = hist_gcamdata_avg_coef_sel,
            x = ~region,
            y = ~EJ_per_PKcal_mean,
            color = ~label,
            colors = c("saddlebrown", "sandybrown", "red", "darkorange", "gold3", "yellowgreen", "darkgreen", "royalblue", "slateblue4", "mediumpurple", "orchid", "plum", "black"),
            type = "scatter",
            mode = "markers",
            marker = list(size = 7)) %>%
  add_trace(data = infilled_test_methods_gcamdata_avg_coef_sel,
            x = ~region, 
            y = ~EJ_per_PKcal_mean,
            color = ~label,
            type = "scatter",
            mode = "markers",
            marker = list(size = 5)) %>%
  layout(xaxis = list(title = ""),
         yaxis = list(title = "EJ per PKcal"),
         title = "Average food processing energy use coefficient, historical and estimated data with various methods")
fig <- ggplot(hist_infilled_test_methods_gcamdata_avg_coef_sel,
       aes(x = label, y = EJ_per_PKcal_mean)) +
  geom_point(size = 4) +
  facet_wrap(~region, ncol = 6) + 
  labs(x = "Method", y = "EJ per PKcal", title = "Average food processing energy use coefficient, historical and estimated data with various methods") + 
  theme_bw() +
  theme(text = element_text(size = 24),
        strip.background = element_blank(),
        axis.text.x = element_text(angle=90, vjust = 1, hjust = 1))

ggsave(paste0(fig_dir, "/coefs_avg_infilled_test_methods_1.png"), fig, width = 35, height = 20)

fig <- ggplot(hist_infilled_test_methods_gcamdata_avg_coef_sel,
       aes(x = region, y = EJ_per_PKcal_mean, color = label)) +
  geom_point(size = 4) + 
  scale_color_manual(values = c("saddlebrown", "sandybrown", "red", "darkorange", "gold3", "yellowgreen", "darkgreen", "royalblue", "slateblue4", "mediumpurple", "orchid", "plum", "black"), name = "Method") + 
  labs(x = "Region", y = "EJ per PKcal", title = "Average food processing energy use coefficient, historical and estimated data with various methods") + 
  theme_bw() +
  theme(text = element_text(size = 24),
        strip.background = element_blank(),
        axis.text.x = element_text(angle=90, vjust = 1, hjust = 1))
  
ggsave(paste0(fig_dir, "/coefs_avg_infilled_test_methods_2.png"), fig, width = 35, height = 20)

# plot 2015 values
hist_infilled_test_methods_gcamdata_coef_sel <- hist_gcamdata_avg_coef %>% 
  mutate(label = "historical") %>%
  dplyr::select(GCAM_region_ID, region, year, label, EJ_per_PKcal) %>%
  rbind(infilled_test_methods_gcamdata_avg_coef %>%
          dplyr::select(GCAM_region_ID, region, year, label, EJ_per_PKcal = EJ_per_PKcal_est))
hist_infilled_test_methods_gcamdata_2015_coef_sel <- hist_infilled_test_methods_gcamdata_coef_sel %>%
  filter(year == 2015)

plot_ly(width = 1000) %>%
  add_trace(data = hist_gcamdata_avg_coef %>% 
              filter(year == 2015) %>%
              mutate(label = "historical"),
            x = ~label,
            y = ~EJ_per_PKcal,
            color = ~region,
            type = "scatter",
            mode = "markers",
            marker = list(line = list(color = I("black"), width = 1),
                          symbol = "circle")) %>%
  add_trace(data = infilled_test_methods_gcamdata_avg_coef %>%
              filter(year == 2015),
            x = ~label, 
            y = ~EJ_per_PKcal_est,
            color = ~region,
            type = "scatter",
            mode = "markers",
            name = ~paste0(region, " estimated")) %>%
  layout(xaxis = list(title = "Method"),
         yaxis = list(title = "EJ per PKcal"),
         title = "2015 food processing energy use coefficient, historical and estimated data with various methods,\nhistorical outlined in black")
plot_ly(width = 1000) %>%
  add_trace(data = hist_gcamdata_avg_coef %>% 
              filter(year == 2015) %>%
              mutate(label = "historical"),
            x = ~region,
            y = ~EJ_per_PKcal,
            color = ~label,
            colors = c("saddlebrown", "sandybrown", "red", "darkorange", "gold3", "yellowgreen", "darkgreen", "royalblue", "slateblue4", "mediumpurple", "orchid", "plum", "black"),
            type = "scatter",
            mode = "markers",
            marker = list(size = 7)) %>%
  add_trace(data = infilled_test_methods_gcamdata_avg_coef %>%
              filter(year == 2015),
            x = ~region, 
            y = ~EJ_per_PKcal_est,
            color = ~label,
            type = "scatter",
            mode = "markers",
            marker = list(size = 5)) %>%
  layout(xaxis = list(title = ""),
         yaxis = list(title = "EJ per PKcal"),
         title = "2015 food processing energy use coefficient, historical and estimated data with various methods")
fig <- ggplot(hist_infilled_test_methods_gcamdata_2015_coef_sel,
       aes(x = label, y = EJ_per_PKcal)) +
  geom_point(size = 4) +
  facet_wrap(~region, ncol = 6) + 
  labs(x = "Method", y = "EJ per PKcal", title = "2015 food processing energy use coefficient, historical and estimated data with various methods") + 
  theme_bw() +
  theme(text = element_text(size = 24),
        strip.background = element_blank(),
        axis.text.x = element_text(angle=90, vjust = 1, hjust = 1))

ggsave(paste0(fig_dir, "/coefs_2015_infilled_test_methods_1.png"), fig, width = 35, height = 20)

fig <- ggplot(hist_infilled_test_methods_gcamdata_2015_coef_sel,
       aes(x = region, y = EJ_per_PKcal, color = label)) +
  geom_point(size = 4) + 
  scale_color_manual(values = c("saddlebrown", "sandybrown", "red", "darkorange", "gold3", "yellowgreen", "darkgreen", "royalblue", "slateblue4", "mediumpurple", "orchid", "plum", "black"), name = "Method") + 
  labs(x = "Region", y = "EJ per PKcal", title = "2015 food processing energy use coefficient, historical and estimated data with various methods") + 
  theme_bw() +
  theme(text = element_text(size = 24),
        strip.background = element_blank(),
        axis.text.x = element_text(angle=90, vjust = 1, hjust = 1))
  
ggsave(paste0(fig_dir, "/coefs_2015_infilled_test_methods_2.png"), fig, width = 35, height = 20)


# plot 2010-2015 values, just for selected methods
fig <- ggplot(hist_infilled_test_methods_gcamdata_coef_sel %>% filter(year >= 2010 & 
                                                                 year <= 2015 & 
                                                                 label %in% c("historical", "energy_est_nonstaples_1",
                                                                              "energy_est_nonstaples_4",
                                                                              "energy_est_nonstaples_5",
                                                                              "energy_est_nonstaples_6")),
       aes(x = region, y = EJ_per_PKcal, color = label)) +
  geom_point(size = 4) + 
  scale_color_manual(values = c("red3",  "darkgreen", "royalblue", "mediumpurple", "black"), name = "Method") + 
  labs(x = "Region", y = "EJ per PKcal", title = "2015 food processing energy use coefficient, historical and estimated data with various methods") + 
  facet_wrap(~year, nrow = 2) + 
  theme_bw() +
  theme(text = element_text(size = 24),
        strip.background = element_blank(),
        axis.text.x = element_text(angle=90, vjust = 0, hjust = 1))
  
ggsave(paste0(fig_dir, "/coefs_2010_2015_infilled_test_methods_sel.png"), fig, width = 35, height = 20)

Final relationship to use for infilling

From these plots, and the summaries of the various models, it seems that the model with nonstaples, GDP, and regional fixed effects fits the data best and generates reasonable predictions for infilling data for regions with missing data. Looking at the 2015 values specifically, the coefficients of predicted EJ per total PKcal consumed seem reasonable, and if anything likely to be underestimates for most of the regions that need infilled data, which is the more conservative path to take anyway (i.e., if anything, pulling out slightly too little energy from other industrial energy use - that seems like a better “worst case” than pulling out too much and overestimating food processing energy use).

First output (again) the information for this relationship.

# print final relationship and get values
m_final <- lm(EJ ~ nonstaples + GDP_mil90USD + region, comb_data_sel_GCAM_reg_gcamdata_final)
# print(summary(m_final))
m_final_data <- get_lm_data(m_final)
## 
## Call:
## lm(formula = EJ ~ nonstaples + GDP_mil90USD + region, data = comb_data_sel_GCAM_reg_gcamdata_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46273 -0.01677  0.00076  0.02120  0.26115 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            4.672e-02  9.006e-02   0.519  0.60417
## nonstaples                             1.519e+00  1.525e-01   9.958  < 2e-16
## GDP_mil90USD                           9.774e-08  9.609e-09  10.171  < 2e-16
## regionAustralia_NZ                    -2.453e-04  9.189e-02  -0.003  0.99787
## regionBrazil                           3.701e-01  9.206e-02   4.021  6.9e-05
## regionCanada                          -1.335e-01  9.435e-02  -1.415  0.15792
## regionCentral Asia                    -9.359e-02  1.103e-01  -0.849  0.39653
## regionChina                            1.350e-01  1.090e-01   1.239  0.21622
## regionColombia                        -3.506e-02  9.173e-02  -0.382  0.70247
## regionEU-12                            9.742e-03  9.179e-02   0.106  0.91553
## regionEU-15                           -3.552e-01  1.118e-01  -3.176  0.00160
## regionEurope_Eastern                  -5.563e-03  9.370e-02  -0.059  0.95269
## regionEurope_Non_EU                   -8.889e-02  9.174e-02  -0.969  0.33315
## regionEuropean Free Trade Association -8.660e-02  9.188e-02  -0.943  0.34646
## regionJapan                           -2.567e-01  9.614e-02  -2.670  0.00788
## regionMexico                          -8.621e-02  9.175e-02  -0.940  0.34798
## regionRussia                           1.275e-01  9.196e-02   1.386  0.16636
## regionSouth America_Northern          -6.324e-02  9.181e-02  -0.689  0.49136
## regionSouth Korea                     -6.009e-02  9.178e-02  -0.655  0.51299
## regionSoutheast Asia                   6.075e-02  9.243e-02   0.657  0.51135
## regionTaiwan                          -6.300e-02  9.174e-02  -0.687  0.49265
## regionUSA                             -2.449e-01  1.104e-01  -2.217  0.02713
##                                          
## (Intercept)                              
## nonstaples                            ***
## GDP_mil90USD                          ***
## regionAustralia_NZ                       
## regionBrazil                          ***
## regionCanada                             
## regionCentral Asia                       
## regionChina                              
## regionColombia                           
## regionEU-12                              
## regionEU-15                           ** 
## regionEurope_Eastern                     
## regionEurope_Non_EU                      
## regionEuropean Free Trade Association    
## regionJapan                           ** 
## regionMexico                             
## regionRussia                             
## regionSouth America_Northern             
## regionSouth Korea                        
## regionSoutheast Asia                     
## regionTaiwan                             
## regionUSA                             *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09001 on 414 degrees of freedom
## Multiple R-squared:  0.9544, Adjusted R-squared:  0.9521 
## F-statistic: 412.8 on 21 and 414 DF,  p-value: < 2.2e-16
nonstaples_slope_final <- (m_final_data[[1]] %>% filter(label == "nonstaples"))$coef[1]
GDP_slope_final <- (m_final_data[[1]] %>% filter(label == "GDP_mil90USD"))$coef[1]
overall_intercept_final <- (m_final_data[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_sig_final <- (m_final_data[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
region_intercepts_final <- m_final_data[[2]] %>% dplyr::select(region, intercept, intercept_if_sig)

# apply model
hist_and_infilled_final_gcamdata <- comb_data_sel_GCAM_reg_gcamdata_to_infill %>% 
    mutate(orig_data = FALSE) %>%
    rbind(comb_data_sel_GCAM_reg_gcamdata_final %>% mutate(orig_data = TRUE)) %>%
    left_join(region_intercepts_final) %>%
    mutate(across(contains("intercept"), ~ replace_na(.x, 0))) %>%
    mutate(EJ_est = nonstaples * nonstaples_slope_final + GDP_mil90USD * GDP_slope_final + overall_intercept_final + intercept,
           EJ_per_PKcal = EJ / PKcal,
           EJ_per_PKcal_est = EJ_est / PKcal)

Using this relationship, plot the infilled data.

plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
            x = ~PKcal,
            y = ~EJ, 
            color = ~region,
            type = "scatter",
            mode = "markers",
            legendgroup = ~region,
            text = ~year) %>%
  add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
            x = ~PKcal,
            y = ~EJ_est,
            color = ~region,
            type = "scatter",
            mode = "markers",
            name = ~paste0(region, " estimated"),
            marker = list(symbol = "cross"),
            legendgroup = ~region,
            text = ~year) %>%
  layout(xaxis = list(title = "PKcal"),
         yaxis = list(title = "EJ"),
         title = "Food processing energy use vs food consumption, 1990-2015\nhistorical and infilled data")
# plot historical data for regions even with not good data
plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
            x = ~PKcal,
            y = ~EJ, 
            color = ~region,
            type = "scatter",
            mode = "markers",
            legendgroup = ~region,
            text = ~year) %>%
  add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
            x = ~PKcal,
            y = ~EJ_est,
            color = ~region,
            type = "scatter",
            mode = "markers",
            name = ~paste0(region, " estimated"),
            marker = list(symbol = "cross"),
            legendgroup = ~region,
            text = ~year) %>%
  add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
            x = ~PKcal,
            y = ~EJ,
            color = ~region,
            type = "scatter",
            mode = "markers",
            name = ~paste0(region, " original, excluded"),
            marker = list(symbol = "square",
                          line = list(color = I("gray"), width = 1)),
            legendgroup = ~region,
            text = ~year) %>%
  layout(xaxis = list(title = "PKcal"),
         yaxis = list(title = "EJ"),
         title = "Food processing energy use vs food consumption, 1990-2015\nhistorical and infilled data, showing excluded original data")
fig <- ggplot() +
  geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
             aes(x = PKcal, y = EJ, color = region, shape = I(19))) + 
  geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
             aes(x = PKcal, y = EJ_est, color = region, shape = I(3))) + 
  scale_color_manual(values = palette36, name = "Region") + 
  labs(x = "PKcal", y = "EJ", title = "Food processing energy use vs food consumption, 1990-2015\nhistorical (circles) and infilled (crosses) data") + 
  theme_bw() +
  theme(text = element_text(size = 12),
        strip.background = element_blank())
  
ggsave(paste0(fig_dir, "/food_pro_EJ_PKcal_hist_infill_final.png"), fig, width = 15, height = 8)

fig <- ggplot() +
  geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
             aes(x = PKcal, y = EJ, color = I("black"), size = I(3))) + 
  geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
             aes(x = PKcal, y = EJ_est, color = I("royalblue"), size = I(3))) + 
  geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
             aes(x = PKcal, y = EJ, color = I("mediumpurple1"), size = I(3))) + 
  facet_wrap(~region, nrow = 6, scales = "free") + 
  labs(x = "PKcal", y = "EJ", title = "Food processing energy use vs food consumption, 1990-2015\nhistorical data (black for regions where data are used, purple for excluded) and infilled data (blue)") + 
  theme_bw() +
  theme(text = element_text(size = 24),
        strip.background = element_blank())
  
ggsave(paste0(fig_dir, "/food_pro_EJ_PKcal_hist_infill_final_sep_regions.png"), fig, width = 35, height = 30)


# coefficients time series
plot_ly(width = 1000) %>%
  add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
            x = ~year,
            y = ~EJ_per_PKcal, 
            color = ~region,
            type = "scatter",
            mode = "markers",
            legendgroup = ~region,
            text = ~year) %>%
  add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
            x = ~year,
            y = ~EJ_per_PKcal_est,
            color = ~region,
            type = "scatter",
            mode = "markers",
            name = ~paste0(region, " estimated"),
            marker = list(symbol = "cross"),
            legendgroup = ~region,
            text = ~year) %>%
  add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
            x = ~year,
            y = ~EJ_per_PKcal,
            color = ~region,
            type = "scatter",
            mode = "markers",
            name = ~paste0(region, " original, excluded"),
            marker = list(symbol = "square",
                          line = list(color = I("gray"), width = 1)),
            legendgroup = ~region,
            text = ~year) %>%
  layout(xaxis = list(title = "Year"),
         yaxis = list(title = "EJ per PKcal"),
         title = "Coefficient of food processing energy use per calorie, 1990-2015\nhistorical and infilled data, showing excluded original data")
fig <- ggplot() +
  geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 2010),
           aes(x = region, y = EJ_per_PKcal, color = I("black"))) + 
  geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 2010),
           aes(x = region, y = EJ_per_PKcal, color = I("mediumpurple1"))) +
  geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 2010),
           aes(x = region, y = EJ_per_PKcal_est, color = I("royalblue"))) +
  facet_wrap(~year, nrow = 2) + 
  labs(x = "", y = "EJ per PKcal", title = "Coefficient of food processing energy use per calorie consumed, 1990-2015\nhistorical data (black for regions where data are used, purple for excluded) and infilled data (blue)") + 
  theme_bw() +
  theme(text = element_text(size = 18),
        strip.background = element_blank(),
        axis.text.x = element_text(angle=90, vjust = 0, hjust = 1))

ggsave(paste0(fig_dir, "/coef_2010_2015_food_pro_EJ_PKcal_hist_infill_final.png"), fig, width = 20, height = 12)
  

# calculate the new shares of food processing energy use of total industry energy use with these infilled values
new_foodpro_frac <- IEA_ind_sectors_GCAM_region_total_en %>%
  group_by(year, GCAM_region_ID) %>%
  summarize(total = sum(value)) %>%
  right_join(hist_and_infilled_final_gcamdata %>%
              filter(orig_data == FALSE) %>%
              dplyr::select(year, GCAM_region_ID, EJ_est)) %>%
  mutate(frac = EJ_est / total)

When infilling in gcamdata, we will only infill for regions that fit the criteria to exclude as defined previously and also have energy per calorie coefficients below the minimum “reasonable” value from the data deemed higher quality and used to fit these relationships. That value is printed below.

print(min((hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE))$EJ_per_PKcal))
## [1] 0.4130662

Print all of the values that will be used in gcamdata.

print(paste0("Coefficient of nonstaples consumption (EJ per PKcal): ", round(nonstaples_slope_final, 4)))
## [1] "Coefficient of nonstaples consumption (EJ per PKcal): 1.519"
print(paste0("Coefficient of GDP (EJ per million 1990 USD): ", round(GDP_slope_final, 12)))
## [1] "Coefficient of GDP (EJ per million 1990 USD): 9.7741e-08"
print(paste0("Overall intercept (EJ): ", round(overall_intercept_final, 4)))
## [1] "Overall intercept (EJ): 0.0467"
print("Regional intercepts (EJ):")
## [1] "Regional intercepts (EJ):"
print(region_intercepts_final %>% 
        dplyr::select(region, intercept) %>%
        mutate(intercept = round(intercept, 4)))
##                             region intercept
## 1                     Australia_NZ   -0.0002
## 2                           Brazil    0.3701
## 3                           Canada   -0.1335
## 4                     Central Asia   -0.0936
## 5                            China    0.1350
## 6                         Colombia   -0.0351
## 7                            EU-12    0.0097
## 8                            EU-15   -0.3552
## 9                   Europe_Eastern   -0.0056
## 10                   Europe_Non_EU   -0.0889
## 11 European Free Trade Association   -0.0866
## 12                           Japan   -0.2567
## 13                          Mexico   -0.0862
## 14                          Russia    0.1275
## 15          South America_Northern   -0.0632
## 16                     South Korea   -0.0601
## 17                  Southeast Asia    0.0608
## 18                          Taiwan   -0.0630
## 19                             USA   -0.2449
print("Combined regional intercepts, using the overall intercept for regions without regional intercepts, and adding the overall intercept to the specific regional intercepts (so now only one value needs to be added for all regions):")
## [1] "Combined regional intercepts, using the overall intercept for regions without regional intercepts, and adding the overall intercept to the specific regional intercepts (so now only one value needs to be added for all regions):"
region_intercepts_final_to_save <- GCAM_region_names %>%
  left_join(region_intercepts_final %>%
              dplyr::select(region, intercept)) %>%
  mutate(intercept = round(replace_na(intercept, 0) + overall_intercept_final, 4),
         units = "EJ")
kable(region_intercepts_final_to_save)
GCAM_region_ID region intercept units
1 USA -0.1982 EJ
2 Africa_Eastern 0.0467 EJ
3 Africa_Northern 0.0467 EJ
4 Africa_Southern 0.0467 EJ
5 Africa_Western 0.0467 EJ
6 Australia_NZ 0.0465 EJ
7 Brazil 0.4168 EJ
8 Canada -0.0867 EJ
9 Central America and Caribbean 0.0467 EJ
10 Central Asia -0.0469 EJ
11 China 0.1817 EJ
12 EU-12 0.0565 EJ
13 EU-15 -0.3085 EJ
14 Europe_Eastern 0.0412 EJ
15 Europe_Non_EU -0.0422 EJ
16 European Free Trade Association -0.0399 EJ
17 India 0.0467 EJ
18 Indonesia 0.0467 EJ
19 Japan -0.2100 EJ
20 Mexico -0.0395 EJ
21 Middle East 0.0467 EJ
22 Pakistan 0.0467 EJ
23 Russia 0.1742 EJ
24 South Africa 0.0467 EJ
25 South America_Northern -0.0165 EJ
26 South America_Southern 0.0467 EJ
27 South Asia 0.0467 EJ
28 South Korea -0.0134 EJ
29 Southeast Asia 0.1075 EJ
30 Taiwan -0.0163 EJ
31 Argentina 0.0467 EJ
32 Colombia 0.0117 EJ
write_csv(region_intercepts_final_to_save, paste0(fig_dir, "/EJ_nonstaples_GDP_lm_intercepts_regional.csv"))
write_csv(data.frame(variable = c("nonstaples calories", "GDP"),
                     coefficient = c(round(nonstaples_slope_final, 4), round(GDP_slope_final, 12)),
                     units = c("EJ per PKcal", "EJ per million 1990 USD")),
          paste0(fig_dir, "/EJ_nonstaples_GDP_lm_coefs.csv"))

Results from infilling in gcamdata

Plotting figures showing the original food processing energy use, added energy for some regions and years, and final food processing energy use.

# Recalculate food processing energy use - add the infilled values to food processing energy use
L1328.in_EJ_R_food_F_Yh_recal %>%
  full_join(L1328.EJ_R_food_F_Yh_est_to_infill_adders %>% select(GCAM_region_ID, fuel, year, infill_fuel),
            by = c("GCAM_region_ID", "fuel", "year")) %>%
  mutate(value = replace_na(value, 0),
         infill_fuel = replace_na(infill_fuel, 0),
         new_value = value + infill_fuel) %>%
  left_join(GCAM_region_names) ->
  L1328.in_EJ_R_food_F_Yh_recal_2_full

L1328.in_EJ_R_food_F_Yh_recal_2_full_longer <- L1328.in_EJ_R_food_F_Yh_recal_2_full %>%
  rename(original = value, updated = new_value, infill = infill_fuel) %>%
  pivot_longer(cols = c(original, infill, updated), names_to = "value_type", values_to = "value") %>%
  mutate(value_type = factor(value_type, levels = c("original", "infill", "updated")))

# Calculate the coefficients of food processing energy use per calorie consumed
L1328.in_EJ_R_food_Yh_recal_2_full_longer <- L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>%
  # summarize to total energy
  group_by(GCAM_region_ID, region, year, sector, value_type) %>%
  summarize(value = sum(value)) %>%
  ungroup()

L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs <- L1328.in_EJ_R_food_Yh_recal_2_full_longer %>%
  filter(value_type != "infill") %>%
  rename(EJ = value) %>%
  left_join(L1328.out_Pcal_R_food_Yh %>% rename(Pcal = value)) %>%
  mutate(EJ_per_Pcal = EJ / Pcal,
         EJ_per_PKcal = EJ_per_Pcal * 1000)

# Recalculate other industry energy use - subtract off the infilled values allocated to food processing
L1328.in_EJ_R_indenergy_F_Yh_tmp_2 %>%
  left_join(L1328.EJ_R_food_F_Yh_est_to_infill_adders %>% select(GCAM_region_ID, fuel, year, infill_fuel),
            by = c("GCAM_region_ID", "fuel", "year")) %>%
  mutate(infill_fuel = replace_na(infill_fuel, 0),
         new_value = value - infill_fuel) %>%
  left_join(GCAM_region_names)  ->
  L1328.in_EJ_R_indenergy_F_Yh_full

L1328.in_EJ_R_indenergy_F_Yh_full_longer <- L1328.in_EJ_R_indenergy_F_Yh_full %>%
  rename(original = value, updated = new_value, removed = infill_fuel) %>%
  pivot_longer(cols = c(original, removed, updated), names_to = "value_type", values_to = "value") %>%
  mutate(value_type = factor(value_type, levels = c("original", "removed", "updated")))

# plot
fig <- ggplot(L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>% filter(year >= 1990),
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_grid(rows = vars(region), cols = vars(value_type), scales = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use, before and after infilling") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata.png"), fig, width = 15, height = 35)

fig <- ggplot(L1328.in_EJ_R_indenergy_F_Yh_full_longer %>% filter(year >= 1990),
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_grid(rows = vars(region), cols = vars(value_type), scales = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Other industry energy use, before and after infilling") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/other_industry_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata.png"), fig, width = 15, height = 35)

# plot just 2015 before and after infilling for each region
fig <- ggplot(L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>% 
                filter(year == 2015),
       aes(x = value_type, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~region, nrow = 8, scales = "free") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use in 2015, before and after infilling") + 
  theme_classic()
fig

ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_2015.png"), fig, width = 15, height = 35)

fig <- ggplot(L1328.in_EJ_R_indenergy_F_Yh_full_longer %>% 
                filter(year == 2015),
       aes(x = value_type, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~region, nrow = 8, scales = "free") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Other industry energy use in 2015, before and after infilling") + 
  theme_classic()
fig

ggsave(paste0(fig_dir, "/other_industry_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_2015.png"), fig, width = 15, height = 35)

# plot just a few selected regions
fig <- ggplot(L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>% filter(year >= 1990 & value_type != "infill" & region %in% c("China", "Southeast Asia", "India", "Pakistan")) %>%
                mutate(region = factor(region, levels = c("China", "Southeast Asia", "India", "Pakistan"))),
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_grid(rows = vars(region), cols = vars(value_type), scales = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use, before and after infilling") + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic() + 
  theme(text = element_text(size = 20))
fig

ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_sel.png"), fig, width = 10, height = 10)

# plot just regions that had infilling happen
regions_sel_infilled <- L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>%
  filter(value_type == "infill" & year >= 1990) %>%
  group_by(region) %>%
  filter(!all(value == 0)) %>%
  ungroup() %>%
  pull(region) %>%
  unique()
fig <- ggplot(L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>% filter(year >= 1990 & region %in% regions_sel_infilled),
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_grid(rows = vars(region), cols = vars(value_type), scales = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use, before and after infilling, for regions where infilling is performed") + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic() + 
  theme(text = element_text(size = 25))
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_sel_infilled.png"), fig, width = 20, height = 30)

# plot the energy per calorie coefficients
L1328.in_EJ_R_food_Yh_recal_2_full_coefs <- L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs %>%
  dplyr::select(GCAM_region_ID, region, year, sector, value_type, EJ_per_PKcal) %>%
  pivot_wider(names_from = "value_type", values_from = "EJ_per_PKcal")  %>%
  mutate(Units = "EJ per PKcal")

L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs_sel <- L1328.in_EJ_R_food_Yh_recal_2_full_coefs %>%
  mutate(updated = ifelse(updated == original, NA, updated)) %>%
  pivot_longer(c(original, updated), names_to = "value_type", values_to = "EJ_per_PKcal") %>%
  filter(!is.na(EJ_per_PKcal))

fig <- ggplot(L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs %>%
                filter(year >= 1990),
       aes(x = year, y = EJ_per_PKcal, color = value_type)) +
  geom_point(size = 2) + 
  facet_wrap(~region, nrow = 8) + 
  scale_color_manual(values = c("darkgreen", "mediumpurple"), name = "") + 
  labs(x = "", y = "EJ per PKcal", title = "Food processing energy use coefficients, before and after infilling") + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_coef_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata.png"), fig, width = 15, height = 35)

fig <- ggplot(L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs_sel %>%
                filter(year >= 1990),
       aes(x = year, y = EJ_per_PKcal, color = value_type)) +
  geom_point(size = 2) + 
  facet_wrap(~region, nrow = 8) + 
  scale_color_manual(values = c("skyblue", "mediumpurple"), name = "") + 
  labs(x = "", y = "EJ per PKcal", title = "Food processing energy use coefficients, before and after infilling") + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_coef_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_2.png"), fig, width = 15, height = 35)

fig <- ggplot(L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs %>%
                filter(year == 2015) %>%
                mutate(region = factor(region, levels = unique((L1328.in_EJ_R_food_Yh_recal_2_full_coefs %>% filter(year == 2015) %>% mutate(unchanged = original == updated) %>% arrange(desc(unchanged)))$region))),
       aes(x = region, y = EJ_per_PKcal, fill = value_type)) +
  geom_col(position = "dodge") + 
  scale_fill_manual(values = c("#00931d", "deepskyblue1"), name = "") + 
  labs(x = "", y = "EJ per PKcal", title = "Food processing energy use coefficients, before and after infilling, in 2015") + 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 55, vjust = 1, hjust = 1),
        text = element_text(size = 20))
fig

ggsave(paste0(fig_dir, "/foodpro_energy_use_coef_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_2015.png"), fig, width = 12, height = 8)

Calculating relationships using the original FAO data for food consumption

This is done just for comparison, as this was the original methodology. However, for consistency in data–since the coefficient of food processing energy use per calorie will actually be applied to the gcamdata processed food consumption data–the relationships derived using the gcamdata processed food consumption data will be used in the breakout.

Join data and calculate relationships on a country level

First join data.

# first join the food processing energy use data with the food production data
comb_data_all <- IEA_foodpro_region_total_en %>% 
  dplyr::select(iso, country_name, GCAM_region_ID, year, EJ = value) %>%
  full_join(total_cal_food_kt_all_years %>%
              dplyr::select(iso, year, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie, MKcal, calperg_food_w_calorie)) %>%
  full_join(GDP_per_cap_hist_calc_FAO %>%
              dplyr::select(iso, year, GCAM_region_ID, GDP_pcap_thous2015USD_FAO, pop, GDP_mil2015USD_FAO)) %>%
  left_join(total_feed_kt_all_years %>%
              dplyr::select(iso, year, GCAM_region_ID, Feed_Kt)) %>%
  filter(year %in% all_IEA_years & !is.na(EJ)) %>%
  arrange(country_name, year) %>%
  # convert some units
  mutate(PKcal = MKcal / (10^9))

# select just regions that do not have zero food processing energy use in all years, and also do not have no food production in all years
comb_data_sel <- comb_data_all %>%
  group_by(iso) %>%
  filter(!all(EJ == 0) & !all(is.na(MKcal) | MKcal == 0)) %>%
  ungroup()

# repeat for the food data with category of food -- add the food production by category and commodity to the rest of the data
comb_data_all_cats <- comb_data_all %>%
  left_join(total_cal_food_kt_all_years_cats %>%
              # convert some units
              mutate(PKcal = MKcal / (10^9)) %>%
              dplyr::select(iso, year, GCAM_region_ID, commodity_type_broad, PKcal) %>%
              pivot_wider(names_from = commodity_type_broad, values_from = PKcal)) %>%
  mutate(nonstaples = nonstaples_animal + nonstaples_other,
         staples_frac = staples / PKcal,
         nonstaples_frac = nonstaples / PKcal,
         EJ_per_PKcal = EJ / PKcal,
         EJ_pcap = EJ / pop) %>%
  left_join(total_cal_food_kt_all_years_commodity %>%
              # convert some units
              mutate(PKcal = MKcal / (10^9)) %>%
              dplyr::select(iso, year, GCAM_region_ID, GCAM_commodity, PKcal) %>%
              pivot_wider(names_from = GCAM_commodity, values_from = PKcal))

# select just regions that do not have zero food processing energy use in all years, and also do not have no food production in all years
comb_data_sel_cats <- comb_data_all_cats %>%
  group_by(iso) %>%
  filter(!all(EJ == 0) & !all(is.na(MKcal) | MKcal == 0)) %>%
  ungroup()

Now filter data, using the same criteria, and calculate relationships.

IEA_ind_sectors_region_total_en_frac_wider <- IEA_ind_sectors_region_total_en %>%
  dplyr::select(-value) %>%
  pivot_wider(names_from = FLOW, values_from = frac)

# get regions and years to include, satisfying our criteria established earlier
country_years_incl <- IEA_ind_sectors_region_total_en_frac_wider %>%
  # select only desired years in which INONSPEC fraction is not too high and food processing fraction is not too low, or in which food processing fraction is high even if INONSPEC fraction is high
  filter(year >= min_year & ((INONSPEC < max_frac_inonspec & (!is.na(FOODPRO) & FOODPRO > min_frac_foodpro)) | FOODPRO > override_frac_foodpro)) %>%
  dplyr::select(iso, country_name, GCAM_region_ID, year)

fig <- ggplot(IEA_ind_sectors_region_fuel %>% filter(FLOW == "FOODPRO") %>% right_join(country_years_incl), 
       aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~country_name, scale = "free_y") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_continuous(breaks = c(1990, 2000, 2010)) + 
  theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_country_fuel_hist_IEA_sel.png"), fig, width = 35, height = 20)

# filter the food and energy data
comb_data_sel_cats_final <- comb_data_sel_cats %>%
  right_join(country_years_incl) %>%
  filter(!is.na(MKcal),
         MKcal > 0)

# get data to infill
comb_data_all_cats_infill <- comb_data_all_cats %>%
  anti_join(country_years_incl)

# calculate relationships
# just going to infill from the minimum year onwards for now
hist_and_infilled_test_methods_country <- run_models_calc_infill(comb_data_sel_cats_final %>% rename(region = country_name, GDP = GDP_mil2015USD_FAO),
                                                                 comb_data_all_cats_infill %>% filter(year >= min_year) %>% 
                                                                   rename(region = country_name, GDP = GDP_mil2015USD_FAO)) %>%
  rename(country_name = region, GDP_mil2015USD_FAO = GDP)
## 
## Call:
## lm(formula = EJ ~ 0 + PKcal, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74877 -0.00110  0.00436  0.01927  0.80324 
## 
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)    
## PKcal  1.03354    0.01518    68.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1239 on 1560 degrees of freedom
## Multiple R-squared:  0.7483, Adjusted R-squared:  0.7482 
## F-statistic:  4638 on 1 and 1560 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71065 -0.02981 -0.02397 -0.00848  0.79290 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.029077   0.003228   9.007   <2e-16 ***
## PKcal       0.988429   0.015625  63.260   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1208 on 1559 degrees of freedom
## Multiple R-squared:  0.7196, Adjusted R-squared:  0.7195 
## F-statistic:  4002 on 1 and 1559 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + PKcal + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64540 -0.00386 -0.00007  0.00287  0.35023 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## PKcal                                             2.4655702  0.0702549  35.095
## regionAlbania                                    -0.0072761  0.0139865  -0.520
## regionAlgeria                                    -0.0973940  0.0220438  -4.418
## regionArgentina                                  -0.0063869  0.0577711  -0.111
## regionArmenia                                    -0.0042379  0.0407703  -0.104
## regionAustralia                                   0.0536709  0.0114680   4.680
## regionAustria                                    -0.0136003  0.0113507  -1.198
## regionAzerbaijan                                 -0.0178580  0.0160087  -1.116
## regionBelarus                                     0.0018460  0.0113359   0.163
## regionBelgium                                    -0.0029741  0.0113717  -0.262
## regionBenin                                      -0.0211867  0.0173949  -1.218
## regionBosnia and Herzegovina                     -0.0088659  0.0203869  -0.435
## regionBotswana                                   -0.0032479  0.0135904  -0.239
## regionBrazil                                      0.1504994  0.0189928   7.924
## regionBulgaria                                   -0.0105671  0.0113274  -0.933
## regionCameroon                                   -0.0498435  0.0204354  -2.439
## regionCanada                                     -0.0677119  0.0177510  -3.815
## regionChina                                      -2.3957724  0.1029992 -23.260
## regionColombia                                   -0.0542326  0.0117267  -4.625
## regionCongo                                      -0.0077665  0.0288296  -0.269
## regionCosta Rica                                  0.0035419  0.0113125   0.313
## regionCote dIvoire                               -0.0458530  0.0167130  -2.744
## regionCroatia                                    -0.0030947  0.0113135  -0.274
## regionCyprus                                     -0.0020856  0.0115317  -0.181
## regionCzech Republic                             -0.0074207  0.0123351  -0.602
## regionDenmark                                     0.0105927  0.0113210   0.936
## regionDominican Republic                         -0.0048202  0.0113234  -0.426
## regionEstonia                                     0.0001313  0.0117698   0.011
## regionFinland                                    -0.0016292  0.0113178  -0.144
## regionFrance                                     -0.0521288  0.0133009  -3.919
## regionGeorgia                                    -0.0112524  0.0144185  -0.780
## regionGermany                                    -0.0791826  0.0138329  -5.724
## regionGreece                                     -0.0210734  0.0113762  -1.852
## regionHungary                                    -0.0096319  0.0113469  -0.849
## regionIceland                                     0.0019442  0.0113075   0.172
## regionIreland                                     0.0049661  0.0113155   0.439
## regionIsrael                                     -0.0207735  0.0166597  -1.247
## regionItaly                                      -0.1173068  0.0132587  -8.848
## regionJamaica                                    -0.0057791  0.0407701  -0.142
## regionJapan                                      -0.1219481  0.0149626  -8.150
## regionKazakhstan                                 -0.0200536  0.0192668  -1.041
## regionKorea, Republic of                         -0.0815894  0.0120981  -6.744
## regionKyrgyzstan                                 -0.0121062  0.0332907  -0.364
## regionLatvia                                     -0.0003774  0.0113093  -0.033
## regionLithuania                                  -0.0013741  0.0113113  -0.121
## regionLuxembourg                                 -0.0007518  0.0144143  -0.052
## regionMacedonia, the former Yugoslav Republic of -0.0040456  0.0113087  -0.358
## regionMalta                                      -0.0011922  0.0235384  -0.051
## regionMexico                                     -0.2249034  0.0145355 -15.473
## regionMoldova, Republic of                       -0.0055319  0.0120264  -0.460
## regionMontenegro                                 -0.0017037  0.0173843  -0.098
## regionMorocco                                    -0.0970869  0.0169038  -5.743
## regionMyanmar                                    -0.1215420  0.0577769  -2.104
## regionNetherlands                                 0.0316258  0.0114272   2.768
## regionNew Zealand                                 0.0067845  0.0113147   0.600
## regionNorway                                      0.0001391  0.0113167   0.012
## regionPhilippines                                -0.1486343  0.0130215 -11.415
## regionPoland                                     -0.0452449  0.0119420  -3.789
## regionPortugal                                   -0.0139507  0.0113497  -1.229
## regionRomania                                    -0.0381717  0.0119456  -3.195
## regionRussian Federation                         -0.0756132  0.0171203  -4.417
## regionSenegal                                    -0.0297852  0.0174063  -1.711
## regionSerbia                                     -0.0039329  0.0173981  -0.226
## regionSlovakia                                   -0.0083704  0.0113173  -0.740
## regionSlovenia                                   -0.0031142  0.0113090  -0.275
## regionSpain                                      -0.0451155  0.0119758  -3.767
## regionSudan                                      -0.0732998  0.0175385  -4.179
## regionSweden                                     -0.0113478  0.0113414  -1.001
## regionSwitzerland                                -0.0138847  0.0113357  -1.225
## regionTaiwan                                     -0.0390163  0.0114625  -3.404
## regionTajikistan                                 -0.0145059  0.0139903  -1.037
## regionThailand                                    0.0444830  0.0123877   3.591
## regionTogo                                       -0.0141698  0.0166491  -0.851
## regionTunisia                                    -0.0293290  0.0174113  -1.684
## regionTurkey                                     -0.1892793  0.0132259 -14.311
## regionUkraine                                    -0.0700790  0.0170961  -4.099
## regionUnited Kingdom                             -0.0607865  0.0126442  -4.807
## regionUnited States of America                   -0.1417882  0.0343651  -4.126
## regionUruguay                                     0.0008953  0.0332894   0.027
## regionVenezuela                                  -0.0573085  0.0115010  -4.983
## regionViet Nam                                   -0.1384807  0.0244328  -5.668
## regionZimbabwe                                   -0.0217735  0.0218032  -0.999
##                                                  Pr(>|t|)    
## PKcal                                             < 2e-16 ***
## regionAlbania                                    0.602984    
## regionAlgeria                                    1.07e-05 ***
## regionArgentina                                  0.911983    
## regionArmenia                                    0.917226    
## regionAustralia                                  3.13e-06 ***
## regionAustria                                    0.231034    
## regionAzerbaijan                                 0.264809    
## regionBelarus                                    0.870665    
## regionBelgium                                    0.793718    
## regionBenin                                      0.223425    
## regionBosnia and Herzegovina                     0.663712    
## regionBotswana                                   0.811150    
## regionBrazil                                     4.49e-15 ***
## regionBulgaria                                   0.351035    
## regionCameroon                                   0.014842 *  
## regionCanada                                     0.000142 ***
## regionChina                                       < 2e-16 ***
## regionColombia                                   4.08e-06 ***
## regionCongo                                      0.787664    
## regionCosta Rica                                 0.754249    
## regionCote dIvoire                               0.006151 ** 
## regionCroatia                                    0.784476    
## regionCyprus                                     0.856503    
## regionCzech Republic                             0.547535    
## regionDenmark                                    0.349596    
## regionDominican Republic                         0.670402    
## regionEstonia                                    0.991103    
## regionFinland                                    0.885557    
## regionFrance                                     9.29e-05 ***
## regionGeorgia                                    0.435273    
## regionGermany                                    1.26e-08 ***
## regionGreece                                     0.064165 .  
## regionHungary                                    0.396100    
## regionIceland                                    0.863509    
## regionIreland                                    0.660817    
## regionIsrael                                     0.212619    
## regionItaly                                       < 2e-16 ***
## regionJamaica                                    0.887298    
## regionJapan                                      7.66e-16 ***
## regionKazakhstan                                 0.298120    
## regionKorea, Republic of                         2.20e-11 ***
## regionKyrgyzstan                                 0.716170    
## regionLatvia                                     0.973384    
## regionLithuania                                  0.903328    
## regionLuxembourg                                 0.958412    
## regionMacedonia, the former Yugoslav Republic of 0.720584    
## regionMalta                                      0.959613    
## regionMexico                                      < 2e-16 ***
## regionMoldova, Republic of                       0.645596    
## regionMontenegro                                 0.921945    
## regionMorocco                                    1.12e-08 ***
## regionMyanmar                                    0.035578 *  
## regionNetherlands                                0.005718 ** 
## regionNew Zealand                                0.548853    
## regionNorway                                     0.990197    
## regionPhilippines                                 < 2e-16 ***
## regionPoland                                     0.000158 ***
## regionPortugal                                   0.219202    
## regionRomania                                    0.001426 ** 
## regionRussian Federation                         1.08e-05 ***
## regionSenegal                                    0.087259 .  
## regionSerbia                                     0.821189    
## regionSlovakia                                   0.459653    
## regionSlovenia                                   0.783067    
## regionSpain                                      0.000172 ***
## regionSudan                                      3.09e-05 ***
## regionSweden                                     0.317200    
## regionSwitzerland                                0.220820    
## regionTaiwan                                     0.000682 ***
## regionTajikistan                                 0.299971    
## regionThailand                                   0.000340 ***
## regionTogo                                       0.394861    
## regionTunisia                                    0.092299 .  
## regionTurkey                                      < 2e-16 ***
## regionUkraine                                    4.37e-05 ***
## regionUnited Kingdom                             1.68e-06 ***
## regionUnited States of America                   3.90e-05 ***
## regionUruguay                                    0.978548    
## regionVenezuela                                  7.00e-07 ***
## regionViet Nam                                   1.74e-08 ***
## regionZimbabwe                                   0.318133    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05766 on 1479 degrees of freedom
## Multiple R-squared:  0.9483, Adjusted R-squared:  0.9454 
## F-statistic: 330.9 on 82 and 1479 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64540 -0.00386 -0.00007  0.00287  0.35023 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                      -0.0072761  0.0139865  -0.520
## PKcal                                             2.4655702  0.0702549  35.095
## regionAlgeria                                    -0.0901179  0.0260722  -3.456
## regionArgentina                                   0.0008892  0.0594236   0.015
## regionArmenia                                     0.0030382  0.0431012   0.070
## regionAustralia                                   0.0609470  0.0180584   3.375
## regionAustria                                    -0.0063242  0.0179980  -0.351
## regionAzerbaijan                                 -0.0105819  0.0212484  -0.498
## regionBelarus                                     0.0091221  0.0179914   0.507
## regionBelgium                                     0.0043020  0.0180079   0.239
## regionBenin                                      -0.0139106  0.0223131  -0.623
## regionBosnia and Herzegovina                     -0.0015898  0.0247202  -0.064
## regionBotswana                                    0.0040282  0.0195002   0.207
## regionBrazil                                      0.1577755  0.0234120   6.739
## regionBulgaria                                   -0.0032910  0.0179880  -0.183
## regionCameroon                                   -0.0425674  0.0247478  -1.720
## regionCanada                                     -0.0604358  0.0225563  -2.679
## regionChina                                      -2.3884962  0.1036787 -23.037
## regionColombia                                   -0.0469565  0.0182061  -2.579
## regionCongo                                      -0.0004904  0.0320411  -0.015
## regionCosta Rica                                  0.0108180  0.0179837   0.602
## regionCote dIvoire                               -0.0385769  0.0217745  -1.772
## regionCroatia                                     0.0041814  0.0179838   0.233
## regionCyprus                                      0.0051905  0.0181262   0.286
## regionCzech Republic                             -0.0001446  0.0186339  -0.008
## regionDenmark                                     0.0178688  0.0179858   0.993
## regionDominican Republic                          0.0024559  0.0179866   0.137
## regionEstonia                                     0.0074074  0.0182780   0.405
## regionFinland                                     0.0056469  0.0179848   0.314
## regionFrance                                     -0.0448527  0.0192031  -2.336
## regionGeorgia                                    -0.0039763  0.0200830  -0.198
## regionGermany                                    -0.0719065  0.0195620  -3.676
## regionGreece                                     -0.0137973  0.0180102  -0.766
## regionHungary                                    -0.0023558  0.0179962  -0.131
## regionIceland                                     0.0092203  0.0179852   0.513
## regionIreland                                     0.0122422  0.0179842   0.681
## regionIsrael                                     -0.0134974  0.0217435  -0.621
## regionItaly                                      -0.1100307  0.0191750  -5.738
## regionJamaica                                     0.0014970  0.0431012   0.035
## regionJapan                                      -0.1146719  0.0203523  -5.634
## regionKazakhstan                                 -0.0127775  0.0237928  -0.537
## regionKorea, Republic of                         -0.0743133  0.0184301  -4.032
## regionKyrgyzstan                                 -0.0048301  0.0361064  -0.134
## regionLatvia                                      0.0068987  0.0179836   0.384
## regionLithuania                                   0.0059020  0.0179835   0.328
## regionLuxembourg                                  0.0065243  0.0200840   0.325
## regionMacedonia, the former Yugoslav Republic of  0.0032305  0.0179838   0.180
## regionMalta                                       0.0060839  0.0273798   0.222
## regionMexico                                     -0.2176272  0.0200494 -10.855
## regionMoldova, Republic of                        0.0017442  0.0184414   0.095
## regionMontenegro                                  0.0055724  0.0223115   0.250
## regionMorocco                                    -0.0898107  0.0219036  -4.100
## regionMyanmar                                    -0.1142659  0.0594288  -1.923
## regionNetherlands                                 0.0389019  0.0180364   2.157
## regionNew Zealand                                 0.0140606  0.0179840   0.782
## regionNorway                                      0.0074152  0.0179845   0.412
## regionPhilippines                                -0.1413581  0.0190184  -7.433
## regionPoland                                     -0.0379688  0.0183347  -2.071
## regionPortugal                                   -0.0066746  0.0179975  -0.371
## regionRomania                                    -0.0308956  0.0183634  -1.682
## regionRussian Federation                         -0.0683371  0.0219550  -3.113
## regionSenegal                                    -0.0225091  0.0223188  -1.009
## regionSerbia                                      0.0033432  0.0223146   0.150
## regionSlovakia                                   -0.0010943  0.0179847  -0.061
## regionSlovenia                                    0.0041619  0.0179837   0.231
## regionSpain                                      -0.0378393  0.0183552  -2.062
## regionSudan                                      -0.0660237  0.0224047  -2.947
## regionSweden                                     -0.0040717  0.0179938  -0.226
## regionSwitzerland                                -0.0066086  0.0179913  -0.367
## regionTaiwan                                     -0.0317401  0.0180554  -1.758
## regionTajikistan                                 -0.0072298  0.0197768  -0.366
## regionThailand                                    0.0517591  0.0186104   2.781
## regionTogo                                       -0.0068936  0.0217392  -0.317
## regionTunisia                                    -0.0220529  0.0223215  -0.988
## regionTurkey                                     -0.1820032  0.0191532  -9.502
## regionUkraine                                    -0.0628029  0.0220407  -2.849
## regionUnited Kingdom                             -0.0535103  0.0187736  -2.850
## regionUnited States of America                   -0.1345121  0.0368658  -3.649
## regionUruguay                                     0.0081714  0.0361062   0.226
## regionVenezuela                                  -0.0500324  0.0180766  -2.768
## regionViet Nam                                   -0.1312046  0.0280901  -4.671
## regionZimbabwe                                   -0.0144974  0.0258965  -0.560
##                                                  Pr(>|t|)    
## (Intercept)                                      0.602984    
## PKcal                                             < 2e-16 ***
## regionAlgeria                                    0.000563 ***
## regionArgentina                                  0.988064    
## regionArmenia                                    0.943813    
## regionAustralia                                  0.000757 ***
## regionAustria                                    0.725349    
## regionAzerbaijan                                 0.618552    
## regionBelarus                                    0.612214    
## regionBelgium                                    0.811219    
## regionBenin                                      0.533101    
## regionBosnia and Herzegovina                     0.948731    
## regionBotswana                                   0.836371    
## regionBrazil                                     2.28e-11 ***
## regionBulgaria                                   0.854858    
## regionCameroon                                   0.085633 .  
## regionCanada                                     0.007459 ** 
## regionChina                                       < 2e-16 ***
## regionColombia                                   0.010000 ** 
## regionCongo                                      0.987791    
## regionCosta Rica                                 0.547567    
## regionCote dIvoire                               0.076658 .  
## regionCroatia                                    0.816174    
## regionCyprus                                     0.774647    
## regionCzech Republic                             0.993809    
## regionDenmark                                    0.320630    
## regionDominican Republic                         0.891411    
## regionEstonia                                    0.685343    
## regionFinland                                    0.753580    
## regionFrance                                     0.019640 *  
## regionGeorgia                                    0.843078    
## regionGermany                                    0.000246 ***
## regionGreece                                     0.443748    
## regionHungary                                    0.895870    
## regionIceland                                    0.608264    
## regionIreland                                    0.496156    
## regionIsrael                                     0.534856    
## regionItaly                                      1.16e-08 ***
## regionJamaica                                    0.972298    
## regionJapan                                      2.10e-08 ***
## regionKazakhstan                                 0.591326    
## regionKorea, Republic of                         5.81e-05 ***
## regionKyrgyzstan                                 0.893600    
## regionLatvia                                     0.701322    
## regionLithuania                                  0.742814    
## regionLuxembourg                                 0.745339    
## regionMacedonia, the former Yugoslav Republic of 0.857466    
## regionMalta                                      0.824185    
## regionMexico                                      < 2e-16 ***
## regionMoldova, Republic of                       0.924662    
## regionMontenegro                                 0.802810    
## regionMorocco                                    4.35e-05 ***
## regionMyanmar                                    0.054705 .  
## regionNetherlands                                0.031178 *  
## regionNew Zealand                                0.434435    
## regionNorway                                     0.680172    
## regionPhilippines                                1.79e-13 ***
## regionPoland                                     0.038544 *  
## regionPortugal                                   0.710793    
## regionRomania                                    0.092693 .  
## regionRussian Federation                         0.001890 ** 
## regionSenegal                                    0.313367    
## regionSerbia                                     0.880927    
## regionSlovakia                                   0.951491    
## regionSlovenia                                   0.817015    
## regionSpain                                      0.039429 *  
## regionSudan                                      0.003260 ** 
## regionSweden                                     0.821012    
## regionSwitzerland                                0.713433    
## regionTaiwan                                     0.078966 .  
## regionTajikistan                                 0.714737    
## regionThailand                                   0.005485 ** 
## regionTogo                                       0.751208    
## regionTunisia                                    0.323332    
## regionTurkey                                      < 2e-16 ***
## regionUkraine                                    0.004441 ** 
## regionUnited Kingdom                             0.004428 ** 
## regionUnited States of America                   0.000273 ***
## regionUruguay                                    0.820987    
## regionVenezuela                                  0.005714 ** 
## regionViet Nam                                   3.27e-06 ***
## regionZimbabwe                                   0.575686    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05766 on 1479 degrees of freedom
## Multiple R-squared:  0.9394, Adjusted R-squared:  0.9361 
## F-statistic: 283.2 on 81 and 1479 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + GDP, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55185 -0.01705 -0.01003 -0.00088  0.74076 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.060e-02  2.439e-03   4.349 1.46e-05 ***
## PKcal       7.114e-01  1.342e-02  53.012  < 2e-16 ***
## GDP         4.933e-08  1.287e-09  38.326  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08747 on 1499 degrees of freedom
##   (59 observations deleted due to missingness)
## Multiple R-squared:  0.8581, Adjusted R-squared:  0.8579 
## F-statistic:  4532 on 2 and 1499 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + GDP + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37483 -0.00362 -0.00014  0.00340  0.26306 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                      -2.350e-03  1.078e-02  -0.218
## PKcal                                             9.769e-01  7.121e-02  13.717
## GDP                                               8.919e-08  2.751e-09  32.423
## regionAlgeria                                    -3.804e-02  2.016e-02  -1.887
## regionArgentina                                   1.511e-02  4.581e-02   0.330
## regionArmenia                                     2.055e-03  3.323e-02   0.062
## regionAustralia                                   1.788e-02  1.398e-02   1.279
## regionAustria                                    -1.861e-02  1.388e-02  -1.341
## regionAzerbaijan                                 -3.225e-03  1.638e-02  -0.197
## regionBelarus                                     1.676e-02  1.410e-02   1.189
## regionBelgium                                    -5.641e-03  1.551e-02  -0.364
## regionBenin                                      -6.494e-03  1.720e-02  -0.377
## regionBosnia and Herzegovina                     -1.654e-03  1.906e-02  -0.087
## regionBotswana                                    5.382e-04  1.503e-02   0.036
## regionBrazil                                      3.557e-01  1.908e-02  18.645
## regionBulgaria                                    2.495e-03  1.387e-02   0.180
## regionCameroon                                   -1.924e-02  1.909e-02  -1.008
## regionCanada                                     -1.163e-01  1.747e-02  -6.654
## regionChina                                      -6.228e-01  9.699e-02  -6.421
## regionColombia                                   -2.667e-03  1.410e-02  -0.189
## regionCongo                                      -1.029e-03  2.470e-02  -0.042
## regionCosta Rica                                  9.833e-03  1.386e-02   0.709
## regionCote dIvoire                               -1.394e-02  1.680e-02  -0.830
## regionCroatia                                     3.167e-03  1.409e-02   0.225
## regionCyprus                                     -1.234e-04  1.397e-02  -0.009
## regionCzech Republic                              3.409e-03  1.437e-02   0.237
## regionDenmark                                     1.428e-03  1.387e-02   0.103
## regionDominican Republic                          6.655e-03  1.387e-02   0.480
## regionEstonia                                     3.515e-03  1.409e-02   0.249
## regionFinland                                    -6.809e-03  1.387e-02  -0.491
## regionFrance                                     -7.975e-02  1.484e-02  -5.373
## regionGeorgia                                    -2.359e-03  1.548e-02  -0.152
## regionGermany                                    -1.641e-01  1.534e-02 -10.695
## regionGreece                                     -1.064e-02  1.388e-02  -0.766
## regionHungary                                     3.825e-03  1.387e-02   0.276
## regionIceland                                     3.710e-03  1.387e-02   0.268
## regionIreland                                     1.448e-03  1.387e-02   0.104
## regionIsrael                                     -2.226e-02  1.676e-02  -1.328
## regionItaly                                      -1.288e-01  1.479e-02  -8.704
## regionJamaica                                    -3.381e-04  3.323e-02  -0.010
## regionJapan                                      -2.699e-01  1.640e-02 -16.461
## regionKazakhstan                                 -2.529e-03  1.834e-02  -0.138
## regionKorea, Republic of                         -6.956e-02  1.421e-02  -4.895
## regionKyrgyzstan                                 -1.813e-03  2.783e-02  -0.065
## regionLatvia                                      3.817e-03  1.409e-02   0.271
## regionLithuania                                   3.460e-03  1.409e-02   0.246
## regionLuxembourg                                 -1.948e-03  1.549e-02  -0.126
## regionMacedonia, the former Yugoslav Republic of  1.242e-03  1.409e-02   0.088
## regionMalta                                       1.139e-03  2.111e-02   0.054
## regionMexico                                     -1.071e-01  1.584e-02  -6.761
## regionMoldova, Republic of                        2.987e-03  1.435e-02   0.208
## regionMontenegro                                  1.639e-03  1.771e-02   0.092
## regionMorocco                                    -3.927e-02  1.696e-02  -2.316
## regionMyanmar                                    -4.591e-02  4.586e-02  -1.001
## regionNetherlands                                 1.195e-02  1.393e-02   0.858
## regionNew Zealand                                 6.262e-03  1.387e-02   0.452
## regionNorway                                     -1.541e-02  1.388e-02  -1.110
## regionPhilippines                                -2.506e-02  1.510e-02  -1.660
## regionPoland                                      1.040e-02  1.421e-02   0.732
## regionPortugal                                   -7.444e-03  1.387e-02  -0.537
## regionRomania                                    -3.930e-03  1.418e-02  -0.277
## regionRussian Federation                          1.045e-01  1.776e-02   5.883
## regionSenegal                                    -1.016e-02  1.721e-02  -0.590
## regionSerbia                                      1.054e-02  1.772e-02   0.595
## regionSlovakia                                   -1.435e-03  1.422e-02  -0.101
## regionSlovenia                                   -4.230e-07  1.409e-02   0.000
## regionSpain                                      -4.876e-02  1.415e-02  -3.445
## regionSudan                                      -3.166e-02  2.007e-02  -1.577
## regionSweden                                     -2.469e-02  1.389e-02  -1.778
## regionSwitzerland                                -4.477e-02  1.392e-02  -3.216
## regionTajikistan                                 -3.645e-03  1.525e-02  -0.239
## regionThailand                                    1.300e-01  1.455e-02   8.932
## regionTogo                                       -3.526e-03  1.676e-02  -0.210
## regionTunisia                                    -9.943e-03  1.721e-02  -0.578
## regionTurkey                                     -8.618e-02  1.506e-02  -5.721
## regionUkraine                                     5.628e-03  1.712e-02   0.329
## regionUnited Kingdom                             -1.496e-01  1.477e-02 -10.129
## regionUnited States of America                   -7.145e-01  3.353e-02 -21.308
## regionUruguay                                     4.154e-03  2.783e-02   0.149
## regionVenezuela                                  -3.613e-02  1.394e-02  -2.591
## regionViet Nam                                   -1.220e-02  2.197e-02  -0.555
## regionZimbabwe                                   -6.372e-03  1.997e-02  -0.319
##                                                  Pr(>|t|)    
## (Intercept)                                      0.827539    
## PKcal                                             < 2e-16 ***
## GDP                                               < 2e-16 ***
## regionAlgeria                                    0.059429 .  
## regionArgentina                                  0.741631    
## regionArmenia                                    0.950686    
## regionAustralia                                  0.201206    
## regionAustria                                    0.180271    
## regionAzerbaijan                                 0.843975    
## regionBelarus                                    0.234646    
## regionBelgium                                    0.716085    
## regionBenin                                      0.705862    
## regionBosnia and Herzegovina                     0.930853    
## regionBotswana                                   0.971449    
## regionBrazil                                      < 2e-16 ***
## regionBulgaria                                   0.857248    
## regionCameroon                                   0.313776    
## regionCanada                                     4.06e-11 ***
## regionChina                                      1.85e-10 ***
## regionColombia                                   0.850054    
## regionCongo                                      0.966766    
## regionCosta Rica                                 0.478263    
## regionCote dIvoire                               0.406893    
## regionCroatia                                    0.822212    
## regionCyprus                                     0.992954    
## regionCzech Republic                             0.812468    
## regionDenmark                                    0.918041    
## regionDominican Republic                         0.631376    
## regionEstonia                                    0.803026    
## regionFinland                                    0.623561    
## regionFrance                                     9.03e-08 ***
## regionGeorgia                                    0.878907    
## regionGermany                                     < 2e-16 ***
## regionGreece                                     0.443678    
## regionHungary                                    0.782845    
## regionIceland                                    0.789086    
## regionIreland                                    0.916843    
## regionIsrael                                     0.184449    
## regionItaly                                       < 2e-16 ***
## regionJamaica                                    0.991883    
## regionJapan                                       < 2e-16 ***
## regionKazakhstan                                 0.890357    
## regionKorea, Republic of                         1.10e-06 ***
## regionKyrgyzstan                                 0.948087    
## regionLatvia                                     0.786489    
## regionLithuania                                  0.806039    
## regionLuxembourg                                 0.899917    
## regionMacedonia, the former Yugoslav Republic of 0.929767    
## regionMalta                                      0.956983    
## regionMexico                                     2.00e-11 ***
## regionMoldova, Republic of                       0.835173    
## regionMontenegro                                 0.926316    
## regionMorocco                                    0.020710 *  
## regionMyanmar                                    0.316975    
## regionNetherlands                                0.391236    
## regionNew Zealand                                0.651602    
## regionNorway                                     0.267231    
## regionPhilippines                                0.097194 .  
## regionPoland                                     0.464334    
## regionPortugal                                   0.591687    
## regionRomania                                    0.781724    
## regionRussian Federation                         5.02e-09 ***
## regionSenegal                                    0.555037    
## regionSerbia                                     0.552094    
## regionSlovakia                                   0.919602    
## regionSlovenia                                   0.999976    
## regionSpain                                      0.000588 ***
## regionSudan                                      0.114958    
## regionSweden                                     0.075575 .  
## regionSwitzerland                                0.001328 ** 
## regionTajikistan                                 0.811070    
## regionThailand                                    < 2e-16 ***
## regionTogo                                       0.833412    
## regionTunisia                                    0.563588    
## regionTurkey                                     1.29e-08 ***
## regionUkraine                                    0.742473    
## regionUnited Kingdom                              < 2e-16 ***
## regionUnited States of America                    < 2e-16 ***
## regionUruguay                                    0.881379    
## regionVenezuela                                  0.009663 ** 
## regionViet Nam                                   0.578749    
## regionZimbabwe                                   0.749665    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04445 on 1420 degrees of freedom
##   (59 observations deleted due to missingness)
## Multiple R-squared:  0.9653, Adjusted R-squared:  0.9633 
## F-statistic: 487.5 on 81 and 1420 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58086 -0.00977 -0.00108  0.00567  0.61242 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## nonstaples  2.60310    0.02297   113.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08127 on 1560 degrees of freedom
## Multiple R-squared:  0.8917, Adjusted R-squared:  0.8916 
## F-statistic: 1.284e+04 on 1 and 1560 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57185 -0.01617 -0.00830 -0.00122  0.60981 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.007335   0.002215   3.311 0.000951 ***
## nonstaples  2.572089   0.024741 103.962  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08101 on 1559 degrees of freedom
## Multiple R-squared:  0.8739, Adjusted R-squared:  0.8739 
## F-statistic: 1.081e+04 on 1 and 1559 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65301 -0.00331 -0.00010  0.00261  0.33872 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## nonstaples                                        2.6046215  0.0644683  40.402
## regionAlbania                                    -0.0032397  0.0130532  -0.248
## regionAlgeria                                    -0.0391350  0.0203919  -1.919
## regionArgentina                                   0.0378308  0.0538561   0.702
## regionArmenia                                    -0.0011714  0.0380544  -0.031
## regionAustralia                                   0.0688185  0.0106324   6.473
## regionAustria                                    -0.0070429  0.0105774  -0.666
## regionAzerbaijan                                 -0.0001723  0.0149276  -0.012
## regionBelarus                                     0.0127213  0.0105630   1.204
## regionBelgium                                     0.0077959  0.0105832   0.737
## regionBenin                                      -0.0063685  0.0162272  -0.392
## regionBosnia and Herzegovina                     -0.0036305  0.0190275  -0.191
## regionBotswana                                   -0.0013702  0.0126848  -0.108
## regionBrazil                                      0.3528411  0.0133944  26.342
## regionBulgaria                                   -0.0004413  0.0105596  -0.042
## regionCameroon                                   -0.0229165  0.0190360  -1.204
## regionCanada                                     -0.0346208  0.0163885  -2.113
## regionChina                                      -0.1753702  0.0355725  -4.930
## regionColombia                                   -0.0132803  0.0106881  -1.243
## regionCongo                                      -0.0021041  0.0269085  -0.078
## regionCosta Rica                                  0.0076812  0.0105560   0.728
## regionCote dIvoire                               -0.0091306  0.0155409  -0.588
## regionCroatia                                     0.0017367  0.0105562   0.165
## regionCyprus                                     -0.0012441  0.0107634  -0.116
## regionCzech Republic                              0.0027686  0.0114914   0.241
## regionDenmark                                     0.0154988  0.0105604   1.468
## regionDominican Republic                          0.0023519  0.0105600   0.223
## regionEstonia                                     0.0015060  0.0109855   0.137
## regionFinland                                     0.0025947  0.0105590   0.246
## regionFrance                                      0.0054369  0.0115370   0.471
## regionGeorgia                                    -0.0040176  0.0134548  -0.299
## regionGermany                                    -0.0118244  0.0117898  -1.003
## regionGreece                                     -0.0079928  0.0105817  -0.755
## regionHungary                                    -0.0005342  0.0105711  -0.051
## regionIceland                                     0.0021346  0.0105543   0.202
## regionIreland                                     0.0096378  0.0105574   0.913
## regionIsrael                                     -0.0124073  0.0155412  -0.798
## regionItaly                                      -0.0428042  0.0113482  -3.772
## regionJamaica                                    -0.0031538  0.0380543  -0.083
## regionJapan                                       0.0377823  0.0114966   3.286
## regionKazakhstan                                 -0.0022879  0.0179541  -0.127
## regionKorea, Republic of                         -0.0062954  0.0107193  -0.587
## regionKyrgyzstan                                 -0.0045335  0.0310715  -0.146
## regionLatvia                                      0.0019760  0.0105550   0.187
## regionLithuania                                   0.0030136  0.0105554   0.286
## regionLuxembourg                                 -0.0003160  0.0134542  -0.023
## regionMacedonia, the former Yugoslav Republic of -0.0014692  0.0105546  -0.139
## regionMalta                                      -0.0007605  0.0219706  -0.035
## regionMexico                                     -0.0805949  0.0114202  -7.057
## regionMoldova, Republic of                        0.0005263  0.0112222   0.047
## regionMontenegro                                 -0.0010187  0.0162264  -0.063
## regionMorocco                                    -0.0332653  0.0155667  -2.137
## regionMyanmar                                    -0.0600785  0.0538439  -1.116
## regionNetherlands                                 0.0420697  0.0106195   3.962
## regionNew Zealand                                 0.0100785  0.0105577   0.955
## regionNorway                                      0.0045596  0.0105582   0.432
## regionPhilippines                                -0.0103961  0.0107787  -0.965
## regionPoland                                      0.0006177  0.0107814   0.057
## regionPortugal                                   -0.0021505  0.0105690  -0.203
## regionRomania                                    -0.0033850  0.0110234  -0.307
## regionRussian Federation                          0.1215281  0.0124793   9.738
## regionSenegal                                    -0.0107542  0.0162289  -0.663
## regionSerbia                                      0.0057513  0.0162304   0.354
## regionSlovakia                                   -0.0027358  0.0105578  -0.259
## regionSlovenia                                   -0.0007592  0.0105548  -0.072
## regionSpain                                      -0.0077251  0.0108466  -0.712
## regionSudan                                      -0.0335572  0.0162592  -2.064
## regionSweden                                     -0.0045831  0.0105710  -0.434
## regionSwitzerland                                -0.0081388  0.0105687  -0.770
## regionTaiwan                                     -0.0173315  0.0106110  -1.633
## regionTajikistan                                 -0.0049136  0.0130531  -0.376
## regionThailand                                    0.1454610  0.0107231  13.565
## regionTogo                                       -0.0039909  0.0155359  -0.257
## regionTunisia                                    -0.0118327  0.0162315  -0.729
## regionTurkey                                     -0.0625871  0.0109254  -5.729
## regionUkraine                                    -0.0107032  0.0156540  -0.684
## regionUnited Kingdom                             -0.0024650  0.0111106  -0.222
## regionUnited States of America                    0.1265614  0.0239929   5.275
## regionUruguay                                     0.0045713  0.0310715   0.147
## regionVenezuela                                  -0.0283499  0.0106124  -2.671
## regionViet Nam                                   -0.0039285  0.0220969  -0.178
## regionZimbabwe                                   -0.0078505  0.0203424  -0.386
##                                                  Pr(>|t|)    
## nonstaples                                        < 2e-16 ***
## regionAlbania                                    0.804018    
## regionAlgeria                                    0.055158 .  
## regionArgentina                                  0.482513    
## regionArmenia                                    0.975448    
## regionAustralia                                  1.31e-10 ***
## regionAustria                                    0.505611    
## regionAzerbaijan                                 0.990793    
## regionBelarus                                    0.228654    
## regionBelgium                                    0.461464    
## regionBenin                                      0.694777    
## regionBosnia and Herzegovina                     0.848707    
## regionBotswana                                   0.913998    
## regionBrazil                                      < 2e-16 ***
## regionBulgaria                                   0.966668    
## regionCameroon                                   0.228839    
## regionCanada                                     0.034810 *  
## regionChina                                      9.15e-07 ***
## regionColombia                                   0.214236    
## regionCongo                                      0.937685    
## regionCosta Rica                                 0.466939    
## regionCote dIvoire                               0.556945    
## regionCroatia                                    0.869344    
## regionCyprus                                     0.908000    
## regionCzech Republic                             0.809648    
## regionDenmark                                    0.142417    
## regionDominican Republic                         0.823783    
## regionEstonia                                    0.890979    
## regionFinland                                    0.805926    
## regionFrance                                     0.637525    
## regionGeorgia                                    0.765284    
## regionGermany                                    0.316057    
## regionGreece                                     0.450164    
## regionHungary                                    0.959705    
## regionIceland                                    0.839750    
## regionIreland                                    0.361446    
## regionIsrael                                     0.424797    
## regionItaly                                      0.000168 ***
## regionJamaica                                    0.933961    
## regionJapan                                      0.001039 ** 
## regionKazakhstan                                 0.898617    
## regionKorea, Republic of                         0.557091    
## regionKyrgyzstan                                 0.884016    
## regionLatvia                                     0.851521    
## regionLithuania                                  0.775299    
## regionLuxembourg                                 0.981267    
## regionMacedonia, the former Yugoslav Republic of 0.889311    
## regionMalta                                      0.972393    
## regionMexico                                     2.60e-12 ***
## regionMoldova, Republic of                       0.962603    
## regionMontenegro                                 0.949950    
## regionMorocco                                    0.032765 *  
## regionMyanmar                                    0.264693    
## regionNetherlands                                7.80e-05 ***
## regionNew Zealand                                0.339933    
## regionNorway                                     0.665913    
## regionPhilippines                                0.334950    
## regionPoland                                     0.954317    
## regionPortugal                                   0.838795    
## regionRomania                                    0.758828    
## regionRussian Federation                          < 2e-16 ***
## regionSenegal                                    0.507655    
## regionSerbia                                     0.723124    
## regionSlovakia                                   0.795573    
## regionSlovenia                                   0.942668    
## regionSpain                                      0.476445    
## regionSudan                                      0.039202 *  
## regionSweden                                     0.664679    
## regionSwitzerland                                0.441376    
## regionTaiwan                                     0.102609    
## regionTajikistan                                 0.706651    
## regionThailand                                    < 2e-16 ***
## regionTogo                                       0.797307    
## regionTunisia                                    0.466121    
## regionTurkey                                     1.23e-08 ***
## regionUkraine                                    0.494252    
## regionUnited Kingdom                             0.824454    
## regionUnited States of America                   1.53e-07 ***
## regionUruguay                                    0.883055    
## regionVenezuela                                  0.007637 ** 
## regionViet Nam                                   0.858916    
## regionZimbabwe                                   0.699613    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05382 on 1479 degrees of freedom
## Multiple R-squared:  0.955,  Adjusted R-squared:  0.9525 
## F-statistic: 382.5 on 82 and 1479 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65301 -0.00331 -0.00010  0.00261  0.33872 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                      -3.240e-03  1.305e-02  -0.248
## nonstaples                                        2.605e+00  6.447e-02  40.402
## regionAlgeria                                    -3.590e-02  2.420e-02  -1.483
## regionArgentina                                   4.107e-02  5.541e-02   0.741
## regionArmenia                                     2.068e-03  4.023e-02   0.051
## regionAustralia                                   7.206e-02  1.683e-02   4.283
## regionAustria                                    -3.803e-03  1.680e-02  -0.226
## regionAzerbaijan                                  3.067e-03  1.983e-02   0.155
## regionBelarus                                     1.596e-02  1.679e-02   0.951
## regionBelgium                                     1.104e-02  1.680e-02   0.657
## regionBenin                                      -3.129e-03  2.082e-02  -0.150
## regionBosnia and Herzegovina                     -3.907e-04  2.307e-02  -0.017
## regionBotswana                                    1.870e-03  1.820e-02   0.103
## regionBrazil                                      3.561e-01  1.864e-02  19.099
## regionBulgaria                                    2.798e-03  1.679e-02   0.167
## regionCameroon                                   -1.968e-02  2.308e-02  -0.853
## regionCanada                                     -3.138e-02  2.094e-02  -1.499
## regionChina                                      -1.721e-01  3.777e-02  -4.557
## regionColombia                                   -1.004e-02  1.686e-02  -0.596
## regionCongo                                       1.136e-03  2.991e-02   0.038
## regionCosta Rica                                  1.092e-02  1.679e-02   0.651
## regionCote dIvoire                               -5.891e-03  2.029e-02  -0.290
## regionCroatia                                     4.976e-03  1.679e-02   0.296
## regionCyprus                                      1.996e-03  1.692e-02   0.118
## regionCzech Republic                              6.008e-03  1.739e-02   0.346
## regionDenmark                                     1.874e-02  1.679e-02   1.116
## regionDominican Republic                          5.592e-03  1.679e-02   0.333
## regionEstonia                                     4.746e-03  1.706e-02   0.278
## regionFinland                                     5.834e-03  1.679e-02   0.348
## regionFrance                                      8.677e-03  1.738e-02   0.499
## regionGeorgia                                    -7.779e-04  1.875e-02  -0.041
## regionGermany                                    -8.585e-03  1.755e-02  -0.489
## regionGreece                                     -4.753e-03  1.680e-02  -0.283
## regionHungary                                     2.706e-03  1.679e-02   0.161
## regionIceland                                     5.374e-03  1.679e-02   0.320
## regionIreland                                     1.288e-02  1.679e-02   0.767
## regionIsrael                                     -9.168e-03  2.029e-02  -0.452
## regionItaly                                      -3.956e-02  1.726e-02  -2.292
## regionJamaica                                     8.591e-05  4.023e-02   0.002
## regionJapan                                       4.102e-02  1.736e-02   2.363
## regionKazakhstan                                  9.518e-04  2.219e-02   0.043
## regionKorea, Republic of                         -3.056e-03  1.688e-02  -0.181
## regionKyrgyzstan                                 -1.294e-03  3.370e-02  -0.038
## regionLatvia                                      5.216e-03  1.679e-02   0.311
## regionLithuania                                   6.253e-03  1.679e-02   0.373
## regionLuxembourg                                  2.924e-03  1.875e-02   0.156
## regionMacedonia, the former Yugoslav Republic of  1.771e-03  1.679e-02   0.105
## regionMalta                                       2.479e-03  2.556e-02   0.097
## regionMexico                                     -7.736e-02  1.731e-02  -4.469
## regionMoldova, Republic of                        3.766e-03  1.721e-02   0.219
## regionMontenegro                                  2.221e-03  2.082e-02   0.107
## regionMorocco                                    -3.003e-02  2.031e-02  -1.478
## regionMyanmar                                    -5.684e-02  5.540e-02  -1.026
## regionNetherlands                                 4.531e-02  1.682e-02   2.694
## regionNew Zealand                                 1.332e-02  1.679e-02   0.793
## regionNorway                                      7.799e-03  1.679e-02   0.465
## regionPhilippines                                -7.156e-03  1.691e-02  -0.423
## regionPoland                                      3.857e-03  1.691e-02   0.228
## regionPortugal                                    1.089e-03  1.679e-02   0.065
## regionRomania                                    -1.453e-04  1.708e-02  -0.009
## regionRussian Federation                          1.248e-01  1.801e-02   6.926
## regionSenegal                                    -7.514e-03  2.083e-02  -0.361
## regionSerbia                                      8.991e-03  2.083e-02   0.432
## regionSlovakia                                    5.039e-04  1.679e-02   0.030
## regionSlovenia                                    2.481e-03  1.679e-02   0.148
## regionSpain                                      -4.485e-03  1.695e-02  -0.265
## regionSudan                                      -3.032e-02  2.084e-02  -1.455
## regionSweden                                     -1.343e-03  1.679e-02  -0.080
## regionSwitzerland                                -4.899e-03  1.679e-02  -0.292
## regionTaiwan                                     -1.409e-02  1.681e-02  -0.838
## regionTajikistan                                 -1.674e-03  1.846e-02  -0.091
## regionThailand                                    1.487e-01  1.688e-02   8.810
## regionTogo                                       -7.511e-04  2.029e-02  -0.037
## regionTunisia                                    -8.593e-03  2.083e-02  -0.413
## regionTurkey                                     -5.935e-02  1.700e-02  -3.491
## regionUkraine                                    -7.463e-03  2.037e-02  -0.366
## regionUnited Kingdom                              7.747e-04  1.711e-02   0.045
## regionUnited States of America                    1.298e-01  2.721e-02   4.771
## regionUruguay                                     7.811e-03  3.370e-02   0.232
## regionVenezuela                                  -2.511e-02  1.681e-02  -1.493
## regionViet Nam                                   -6.888e-04  2.565e-02  -0.027
## regionZimbabwe                                   -4.611e-03  2.417e-02  -0.191
##                                                  Pr(>|t|)    
## (Intercept)                                      0.804018    
## nonstaples                                        < 2e-16 ***
## regionAlgeria                                    0.138277    
## regionArgentina                                  0.458686    
## regionArmenia                                    0.959004    
## regionAustralia                                  1.96e-05 ***
## regionAustria                                    0.820886    
## regionAzerbaijan                                 0.877079    
## regionBelarus                                    0.341899    
## regionBelgium                                    0.511312    
## regionBenin                                      0.880593    
## regionBosnia and Herzegovina                     0.986491    
## regionBotswana                                   0.918201    
## regionBrazil                                      < 2e-16 ***
## regionBulgaria                                   0.867628    
## regionCameroon                                   0.394009    
## regionCanada                                     0.134127    
## regionChina                                      5.61e-06 ***
## regionColombia                                   0.551517    
## regionCongo                                      0.969714    
## regionCosta Rica                                 0.515405    
## regionCote dIvoire                               0.771633    
## regionCroatia                                    0.766916    
## regionCyprus                                     0.906115    
## regionCzech Republic                             0.729704    
## regionDenmark                                    0.264502    
## regionDominican Republic                         0.739111    
## regionEstonia                                    0.780916    
## regionFinland                                    0.728220    
## regionFrance                                     0.617790    
## regionGeorgia                                    0.966903    
## regionGermany                                    0.624788    
## regionGreece                                     0.777243    
## regionHungary                                    0.872021    
## regionIceland                                    0.748888    
## regionIreland                                    0.443114    
## regionIsrael                                     0.651507    
## regionItaly                                      0.022061 *  
## regionJamaica                                    0.998296    
## regionJapan                                      0.018248 *  
## regionKazakhstan                                 0.965796    
## regionKorea, Republic of                         0.856335    
## regionKyrgyzstan                                 0.969382    
## regionLatvia                                     0.756054    
## regionLithuania                                  0.709546    
## regionLuxembourg                                 0.876076    
## regionMacedonia, the former Yugoslav Republic of 0.916012    
## regionMalta                                      0.922728    
## regionMexico                                     8.46e-06 ***
## regionMoldova, Republic of                       0.826846    
## regionMontenegro                                 0.915078    
## regionMorocco                                    0.139497    
## regionMyanmar                                    0.305068    
## regionNetherlands                                0.007137 ** 
## regionNew Zealand                                0.427672    
## regionNorway                                     0.642272    
## regionPhilippines                                0.672224    
## regionPoland                                     0.819613    
## regionPortugal                                   0.948286    
## regionRomania                                    0.993213    
## regionRussian Federation                         6.44e-12 ***
## regionSenegal                                    0.718274    
## regionSerbia                                     0.666003    
## regionSlovakia                                   0.976055    
## regionSlovenia                                   0.882540    
## regionSpain                                      0.791358    
## regionSudan                                      0.146019    
## regionSweden                                     0.936249    
## regionSwitzerland                                0.770506    
## regionTaiwan                                     0.402091    
## regionTajikistan                                 0.927759    
## regionThailand                                    < 2e-16 ***
## regionTogo                                       0.970476    
## regionTunisia                                    0.679960    
## regionTurkey                                     0.000495 ***
## regionUkraine                                    0.714118    
## regionUnited Kingdom                             0.963899    
## regionUnited States of America                   2.02e-06 ***
## regionUruguay                                    0.816746    
## regionVenezuela                                  0.135544    
## regionViet Nam                                   0.978582    
## regionZimbabwe                                   0.848730    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05382 on 1479 degrees of freedom
## Multiple R-squared:  0.9472, Adjusted R-squared:  0.9443 
## F-statistic: 327.7 on 81 and 1479 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + GDP, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62000 -0.01527 -0.00614 -0.00004  0.63106 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.123e-03  2.177e-03   2.353   0.0188 *  
## nonstaples  2.212e+00  3.512e-02  62.976   <2e-16 ***
## GDP         1.979e-08  1.426e-09  13.882   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07767 on 1499 degrees of freedom
##   (59 observations deleted due to missingness)
## Multiple R-squared:  0.8881, Adjusted R-squared:  0.8879 
## F-statistic:  5948 on 2 and 1499 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + GDP + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39874 -0.00356 -0.00008  0.00318  0.25814 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                      -8.247e-04  1.072e-02  -0.077
## nonstaples                                        1.095e+00  7.642e-02  14.334
## GDP                                               8.263e-08  3.004e-09  27.510
## regionAlgeria                                    -1.692e-02  1.990e-02  -0.850
## regionArgentina                                   3.332e-02  4.552e-02   0.732
## regionArmenia                                     1.691e-03  3.305e-02   0.051
## regionAustralia                                   2.687e-02  1.392e-02   1.931
## regionAustria                                    -1.614e-02  1.380e-02  -1.169
## regionAzerbaijan                                  2.312e-03  1.629e-02   0.142
## regionBelarus                                     1.943e-02  1.402e-02   1.386
## regionBelgium                                    -7.352e-04  1.542e-02  -0.048
## regionBenin                                      -2.266e-03  1.711e-02  -0.132
## regionBosnia and Herzegovina                     -1.128e-03  1.896e-02  -0.060
## regionBotswana                                   -2.227e-04  1.495e-02  -0.015
## regionBrazil                                      4.351e-01  1.559e-02  27.911
## regionBulgaria                                    4.914e-03  1.379e-02   0.356
## regionCameroon                                   -1.051e-02  1.896e-02  -0.554
## regionCanada                                     -9.761e-02  1.737e-02  -5.620
## regionChina                                       2.514e-01  3.469e-02   7.245
## regionColombia                                    1.160e-02  1.387e-02   0.836
## regionCongo                                      -3.136e-04  2.457e-02  -0.013
## regionCosta Rica                                  9.996e-03  1.379e-02   0.725
## regionCote dIvoire                               -1.129e-03  1.667e-02  -0.068
## regionCroatia                                     3.558e-03  1.402e-02   0.254
## regionCyprus                                     -1.211e-03  1.390e-02  -0.087
## regionCzech Republic                              6.266e-03  1.428e-02   0.439
## regionDenmark                                     3.201e-03  1.380e-02   0.232
## regionDominican Republic                          7.898e-03  1.379e-02   0.573
## regionEstonia                                     2.577e-03  1.402e-02   0.184
## regionFinland                                    -5.665e-03  1.380e-02  -0.411
## regionFrance                                     -4.994e-02  1.444e-02  -3.459
## regionGeorgia                                    -1.076e-03  1.540e-02  -0.070
## regionGermany                                    -1.253e-01  1.502e-02  -8.338
## regionGreece                                     -6.379e-03  1.380e-02  -0.462
## regionHungary                                     5.974e-03  1.380e-02   0.433
## regionIceland                                     2.327e-03  1.379e-02   0.169
## regionIreland                                     2.619e-03  1.380e-02   0.190
## regionIsrael                                     -1.948e-02  1.668e-02  -1.168
## regionItaly                                      -9.307e-02  1.431e-02  -6.502
## regionJamaica                                    -8.432e-04  3.305e-02  -0.026
## regionJapan                                      -1.863e-01  1.648e-02 -11.309
## regionKazakhstan                                  3.255e-03  1.823e-02   0.179
## regionKorea, Republic of                         -3.710e-02  1.392e-02  -2.666
## regionKyrgyzstan                                 -4.542e-04  2.769e-02  -0.016
## regionLatvia                                      3.229e-03  1.402e-02   0.230
## regionLithuania                                   3.723e-03  1.402e-02   0.266
## regionLuxembourg                                 -2.996e-03  1.540e-02  -0.195
## regionMacedonia, the former Yugoslav Republic of  6.465e-04  1.402e-02   0.046
## regionMalta                                      -1.752e-04  2.099e-02  -0.008
## regionMexico                                     -4.998e-02  1.426e-02  -3.505
## regionMoldova, Republic of                        3.817e-03  1.428e-02   0.267
## regionMontenegro                                  3.819e-04  1.762e-02   0.022
## regionMorocco                                    -1.596e-02  1.669e-02  -0.956
## regionMyanmar                                    -2.436e-02  4.553e-02  -0.535
## regionNetherlands                                 1.759e-02  1.385e-02   1.270
## regionNew Zealand                                 6.617e-03  1.379e-02   0.480
## regionNorway                                     -1.344e-02  1.381e-02  -0.973
## regionPhilippines                                 2.718e-02  1.395e-02   1.948
## regionPoland                                      2.695e-02  1.392e-02   1.936
## regionPortugal                                   -3.624e-03  1.380e-02  -0.263
## regionRomania                                     8.269e-03  1.403e-02   0.589
## regionRussian Federation                          1.818e-01  1.502e-02  12.107
## regionSenegal                                    -4.335e-03  1.711e-02  -0.253
## regionSerbia                                      1.288e-02  1.762e-02   0.731
## regionSlovakia                                   -6.163e-04  1.414e-02  -0.044
## regionSlovenia                                   -4.896e-04  1.402e-02  -0.035
## regionSpain                                      -3.135e-02  1.396e-02  -2.245
## regionSudan                                      -1.681e-02  1.988e-02  -0.846
## regionSweden                                     -2.160e-02  1.381e-02  -1.564
## regionSwitzerland                                -4.087e-02  1.386e-02  -2.950
## regionTajikistan                                 -1.464e-03  1.516e-02  -0.097
## regionThailand                                    1.684e-01  1.388e-02  12.127
## regionTogo                                       -1.094e-03  1.667e-02  -0.066
## regionTunisia                                    -4.680e-03  1.711e-02  -0.274
## regionTurkey                                     -3.701e-02  1.399e-02  -2.645
## regionUkraine                                     2.643e-02  1.678e-02   1.575
## regionUnited Kingdom                             -1.159e-01  1.468e-02  -7.895
## regionUnited States of America                   -5.381e-01  3.296e-02 -16.328
## regionUruguay                                     4.303e-03  2.769e-02   0.155
## regionVenezuela                                  -2.538e-02  1.381e-02  -1.837
## regionViet Nam                                    3.836e-02  2.112e-02   1.816
## regionZimbabwe                                   -2.516e-03  1.986e-02  -0.127
##                                                  Pr(>|t|)    
## (Intercept)                                      0.938712    
## nonstaples                                        < 2e-16 ***
## GDP                                               < 2e-16 ***
## regionAlgeria                                    0.395249    
## regionArgentina                                  0.464272    
## regionArmenia                                    0.959193    
## regionAustralia                                  0.053713 .  
## regionAustria                                    0.242601    
## regionAzerbaijan                                 0.887164    
## regionBelarus                                    0.166001    
## regionBelgium                                    0.961987    
## regionBenin                                      0.894628    
## regionBosnia and Herzegovina                     0.952559    
## regionBotswana                                   0.988121    
## regionBrazil                                      < 2e-16 ***
## regionBulgaria                                   0.721646    
## regionCameroon                                   0.579644    
## regionCanada                                     2.29e-08 ***
## regionChina                                      7.08e-13 ***
## regionColombia                                   0.403182    
## regionCongo                                      0.989817    
## regionCosta Rica                                 0.468642    
## regionCote dIvoire                               0.946033    
## regionCroatia                                    0.799647    
## regionCyprus                                     0.930577    
## regionCzech Republic                             0.660949    
## regionDenmark                                    0.816636    
## regionDominican Republic                         0.566947    
## regionEstonia                                    0.854160    
## regionFinland                                    0.681431    
## regionFrance                                     0.000558 ***
## regionGeorgia                                    0.944314    
## regionGermany                                     < 2e-16 ***
## regionGreece                                     0.643980    
## regionHungary                                    0.665032    
## regionIceland                                    0.866042    
## regionIreland                                    0.849480    
## regionIsrael                                     0.242935    
## regionItaly                                      1.09e-10 ***
## regionJamaica                                    0.979650    
## regionJapan                                       < 2e-16 ***
## regionKazakhstan                                 0.858313    
## regionKorea, Republic of                         0.007767 ** 
## regionKyrgyzstan                                 0.986913    
## regionLatvia                                     0.817829    
## regionLithuania                                  0.790571    
## regionLuxembourg                                 0.845797    
## regionMacedonia, the former Yugoslav Republic of 0.963212    
## regionMalta                                      0.993341    
## regionMexico                                     0.000470 ***
## regionMoldova, Republic of                       0.789221    
## regionMontenegro                                 0.982711    
## regionMorocco                                    0.339190    
## regionMyanmar                                    0.592731    
## regionNetherlands                                0.204300    
## regionNew Zealand                                0.631455    
## regionNorway                                     0.330750    
## regionPhilippines                                0.051553 .  
## regionPoland                                     0.053046 .  
## regionPortugal                                   0.792807    
## regionRomania                                    0.555789    
## regionRussian Federation                          < 2e-16 ***
## regionSenegal                                    0.799993    
## regionSerbia                                     0.464875    
## regionSlovakia                                   0.965246    
## regionSlovenia                                   0.972137    
## regionSpain                                      0.024897 *  
## regionSudan                                      0.397877    
## regionSweden                                     0.118084    
## regionSwitzerland                                0.003231 ** 
## regionTajikistan                                 0.923077    
## regionThailand                                    < 2e-16 ***
## regionTogo                                       0.947690    
## regionTunisia                                    0.784506    
## regionTurkey                                     0.008251 ** 
## regionUkraine                                    0.115475    
## regionUnited Kingdom                             5.76e-15 ***
## regionUnited States of America                    < 2e-16 ***
## regionUruguay                                    0.876522    
## regionVenezuela                                  0.066357 .  
## regionViet Nam                                   0.069569 .  
## regionZimbabwe                                   0.899169    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04421 on 1420 degrees of freedom
##   (59 observations deleted due to missingness)
## Multiple R-squared:  0.9657, Adjusted R-squared:  0.9637 
## F-statistic: 492.9 on 81 and 1420 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples + staples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65301 -0.01313 -0.00181  0.00369  0.56450 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## nonstaples  3.18511    0.04297   74.13   <2e-16 ***
## staples    -0.48420    0.03101  -15.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0756 on 1559 degrees of freedom
## Multiple R-squared:  0.9063, Adjusted R-squared:  0.9062 
## F-statistic:  7541 on 2 and 1559 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + staples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65054 -0.01555 -0.00434  0.00116  0.56414 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.002627   0.002090   1.257    0.209    
## nonstaples   3.167009   0.045310  69.897   <2e-16 ***
## staples     -0.478379   0.031351 -15.259   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07559 on 1558 degrees of freedom
## Multiple R-squared:  0.8903, Adjusted R-squared:  0.8902 
## F-statistic:  6324 on 2 and 1558 DF,  p-value: < 2.2e-16

Join data and calculate relationships on a GCAM region level

Using the same criteria to select data as established earlier.

# aggregate the food and energy data, dropping NA values
comb_data_all_cats_GCAM_reg <- comb_data_all_cats %>%
  group_by(GCAM_region_ID, year) %>%
  summarize(EJ = sum(EJ),
            PKcal_w_na = sum(PKcal, na.rm = FALSE),
            PKcal = sum(PKcal, na.rm = TRUE),
            Food_Kt = sum(Food_Kt, na.rm = TRUE),
            Food_Kt_w_calorie = sum(Food_Kt_w_calorie, na.rm = TRUE),
            GDP_total_thous2015USD_FAO = sum(GDP_pcap_thous2015USD_FAO * pop, na.rm = TRUE),
            pop = sum(pop, na.rm = TRUE),
            Feed_Kt = sum(Feed_Kt, na.rm = TRUE),
            nonstaples_animal = sum(nonstaples_animal, na.rm = TRUE),
            nonstaples_other = sum(nonstaples_other, na.rm = TRUE),
            staples = sum(staples, na.rm = TRUE)) %>%
  mutate(GDP_mil2015USD_FAO = GDP_total_thous2015USD_FAO / 1000,
         GDP_pcap_thous2015USD_FAO = GDP_total_thous2015USD_FAO / pop,
         nonstaples = nonstaples_animal + nonstaples_other,
         staples_frac = staples / PKcal,
         nonstaples_frac = nonstaples / PKcal,
         EJ_per_PKcal = EJ / PKcal) %>%
  ungroup() %>%
  left_join(GCAM_region_names)

comb_data_sel_cats_GCAM_reg <- comb_data_all_cats_GCAM_reg %>%
  group_by(region) %>%
  filter(!all(EJ == 0) & !all(is.na(PKcal) | PKcal == 0)) %>%
  ungroup()

comb_data_sel_cats_GCAM_reg_final <- comb_data_sel_cats_GCAM_reg %>%
  right_join(GCAM_reg_years_incl) %>%
  filter(!is.na(PKcal),
         PKcal > 0)

# filter to the data to infill
comb_data_all_cats_GCAM_reg_to_infill <- comb_data_all_cats_GCAM_reg %>%
  anti_join(GCAM_reg_years_incl)

# calculate relationships
# just going to infill from the minimum year onwards for now
hist_and_infilled_test_methods_region <- run_models_calc_infill(comb_data_sel_cats_GCAM_reg_final %>%
                                                                  rename(GDP = GDP_total_thous2015USD_FAO), 
                                                                comb_data_all_cats_GCAM_reg_to_infill %>% filter(year >= min_year) %>%
                                                                  rename(GDP = GDP_total_thous2015USD_FAO)) %>%
  rename(GDP_total_thous2015USD_FAO = GDP)
## 
## Call:
## lm(formula = EJ ~ 0 + PKcal, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.85107 -0.01511  0.01045  0.11280  0.77794 
## 
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)    
## PKcal  1.09445    0.02872   38.11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2537 on 435 degrees of freedom
## Multiple R-squared:  0.7695, Adjusted R-squared:  0.769 
## F-statistic:  1452 on 1 and 435 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74835 -0.10406 -0.08288  0.03422  0.73279 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.10176    0.01393   7.305 1.34e-12 ***
## PKcal        0.95816    0.03293  29.097  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2397 on 434 degrees of freedom
## Multiple R-squared:  0.6611, Adjusted R-squared:  0.6603 
## F-statistic: 846.6 on 1 and 434 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + PKcal + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64795 -0.01905  0.00024  0.02269  0.35150 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## PKcal                                  2.312624   0.128753  17.962  < 2e-16 ***
## regionArgentina                        0.001514   0.112003   0.014 0.989224    
## regionAustralia_NZ                     0.065496   0.022334   2.933 0.003547 ** 
## regionBrazil                           0.183721   0.035537   5.170 3.65e-07 ***
## regionCanada                          -0.059897   0.034347  -1.744 0.081919 .  
## regionCentral Asia                    -0.175384   0.080020  -2.192 0.028952 *  
## regionChina                           -2.196125   0.190183 -11.547  < 2e-16 ***
## regionColombia                        -0.047468   0.022654  -2.095 0.036749 *  
## regionEU-12                           -0.109201   0.028496  -3.832 0.000147 ***
## regionEU-15                           -0.286060   0.076445  -3.742 0.000208 ***
## regionEurope_Eastern                  -0.063382   0.033541  -1.890 0.059494 .  
## regionEurope_Non_EU                   -0.215491   0.027231  -7.913 2.30e-14 ***
## regionEuropean Free Trade Association -0.009011   0.022052  -0.409 0.683038    
## regionJapan                           -0.100616   0.028342  -3.550 0.000429 ***
## regionMexico                          -0.205020   0.027586  -7.432 6.16e-13 ***
## regionRussia                          -0.048545   0.032250  -1.505 0.133013    
## regionSouth America_Northern          -0.056373   0.022723  -2.481 0.013502 *  
## regionSouth Korea                     -0.072224   0.023301  -3.100 0.002070 ** 
## regionSoutheast Asia                  -0.534924   0.053101 -10.074  < 2e-16 ***
## regionTaiwan                          -0.034926   0.022195  -1.574 0.116350    
## regionUSA                             -0.071141   0.063386  -1.122 0.262365    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1118 on 415 degrees of freedom
## Multiple R-squared:  0.9573, Adjusted R-squared:  0.9551 
## F-statistic:   443 on 21 and 415 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64795 -0.01905  0.00024  0.02269  0.35150 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.001514   0.112003   0.014   0.9892    
## PKcal                                  2.312624   0.128753  17.962  < 2e-16 ***
## regionAustralia_NZ                     0.063982   0.113960   0.561   0.5748    
## regionBrazil                           0.182207   0.115912   1.572   0.1167    
## regionCanada                          -0.061410   0.116777  -0.526   0.5993    
## regionCentral Asia                    -0.176897   0.137052  -1.291   0.1975    
## regionChina                           -2.197639   0.214945 -10.224  < 2e-16 ***
## regionColombia                        -0.048981   0.113939  -0.430   0.6675    
## regionEU-12                           -0.110715   0.114519  -0.967   0.3342    
## regionEU-15                           -0.287574   0.131964  -2.179   0.0299 *  
## regionEurope_Eastern                  -0.064896   0.116397  -0.558   0.5775    
## regionEurope_Non_EU                   -0.217005   0.114330  -1.898   0.0584 .  
## regionEuropean Free Trade Association -0.010524   0.114016  -0.092   0.9265    
## regionJapan                           -0.102129   0.114495  -0.892   0.3729    
## regionMexico                          -0.206533   0.114381  -1.806   0.0717 .  
## regionRussia                          -0.050059   0.115246  -0.434   0.6642    
## regionSouth America_Northern          -0.057887   0.114049  -0.508   0.6120    
## regionSouth Korea                     -0.073738   0.113942  -0.647   0.5179    
## regionSoutheast Asia                  -0.536438   0.121366  -4.420 1.26e-05 ***
## regionTaiwan                          -0.036440   0.113980  -0.320   0.7494    
## regionUSA                             -0.072654   0.125584  -0.579   0.5632    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1118 on 415 degrees of freedom
## Multiple R-squared:  0.9295, Adjusted R-squared:  0.9261 
## F-statistic: 273.5 on 20 and 415 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + GDP, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55125 -0.07106 -0.03461  0.04629  0.72168 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.124e-02  9.489e-03   4.347 1.73e-05 ***
## PKcal       6.723e-01  2.470e-02  27.223  < 2e-16 ***
## GDP         4.806e-11  2.006e-12  23.955  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1574 on 433 degrees of freedom
## Multiple R-squared:  0.8543, Adjusted R-squared:  0.8536 
## F-statistic:  1269 on 2 and 433 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ PKcal + GDP + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42314 -0.02126 -0.00011  0.02144  0.26629 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            1.226e-02  8.828e-02   0.139 0.889650
## PKcal                                  1.182e+00  1.238e-01   9.551  < 2e-16
## GDP                                    7.356e-11  4.616e-12  15.938  < 2e-16
## regionAustralia_NZ                     1.619e-02  8.987e-02   0.180 0.857120
## regionBrazil                           3.176e-01  9.175e-02   3.461 0.000594
## regionCanada                          -1.191e-01  9.211e-02  -1.293 0.196614
## regionCentral Asia                    -1.090e-01  1.081e-01  -1.008 0.314051
## regionChina                           -8.965e-01  1.881e-01  -4.767 2.59e-06
## regionColombia                        -2.345e-02  8.982e-02  -0.261 0.794154
## regionEU-12                           -2.705e-02  9.041e-02  -0.299 0.764950
## regionEU-15                           -5.890e-01  1.057e-01  -5.571 4.56e-08
## regionEurope_Eastern                  -7.468e-03  9.181e-02  -0.081 0.935216
## regionEurope_Non_EU                   -1.289e-01  9.028e-02  -1.428 0.154025
## regionEuropean Free Trade Association -6.571e-02  8.993e-02  -0.731 0.465426
## regionJapan                           -2.505e-01  9.072e-02  -2.761 0.006018
## regionMexico                          -1.347e-01  9.027e-02  -1.493 0.136308
## regionRussia                           6.721e-02  9.113e-02   0.737 0.461268
## regionSouth America_Northern          -5.450e-02  8.989e-02  -0.606 0.544632
## regionSouth Korea                     -8.248e-02  8.981e-02  -0.918 0.358962
## regionSoutheast Asia                  -1.992e-01  9.797e-02  -2.033 0.042672
## regionTaiwan                          -1.696e-02  8.985e-02  -0.189 0.850389
## regionUSA                             -6.029e-01  1.044e-01  -5.774 1.52e-08
##                                          
## (Intercept)                              
## PKcal                                 ***
## GDP                                   ***
## regionAustralia_NZ                       
## regionBrazil                          ***
## regionCanada                             
## regionCentral Asia                       
## regionChina                           ***
## regionColombia                           
## regionEU-12                              
## regionEU-15                           ***
## regionEurope_Eastern                     
## regionEurope_Non_EU                      
## regionEuropean Free Trade Association    
## regionJapan                           ** 
## regionMexico                             
## regionRussia                             
## regionSouth America_Northern             
## regionSouth Korea                        
## regionSoutheast Asia                  *  
## regionTaiwan                             
## regionUSA                             ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08812 on 414 degrees of freedom
## Multiple R-squared:  0.9563, Adjusted R-squared:  0.9541 
## F-statistic: 431.4 on 21 and 414 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58454 -0.03084 -0.00833  0.05096  0.61644 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## nonstaples  2.57670    0.03693   69.78   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1513 on 435 degrees of freedom
## Multiple R-squared:  0.918,  Adjusted R-squared:  0.9178 
## F-statistic:  4869 on 1 and 435 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56417 -0.05481 -0.03459  0.03206  0.60096 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.029797   0.009128   3.264  0.00118 ** 
## nonstaples  2.482717   0.046507  53.383  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1497 on 434 degrees of freedom
## Multiple R-squared:  0.8678, Adjusted R-squared:  0.8675 
## F-statistic:  2850 on 1 and 434 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65396 -0.01592 -0.00002  0.01969  0.33582 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## nonstaples                             2.5314864  0.1189007  21.291  < 2e-16
## regionArgentina                        0.0401654  0.1031191   0.390 0.697102
## regionAustralia_NZ                     0.0806606  0.0204120   3.952 9.12e-05
## regionBrazil                           0.3621971  0.0252941  14.319  < 2e-16
## regionCanada                          -0.0320123  0.0313586  -1.021 0.307922
## regionCentral Asia                    -0.0748663  0.0730905  -1.024 0.306292
## regionChina                           -0.1532522  0.0665663  -2.302 0.021815
## regionColombia                        -0.0113681  0.0204473  -0.556 0.578530
## regionEU-12                            0.0029355  0.0225874   0.130 0.896659
## regionEU-15                            0.0247097  0.0513317   0.481 0.630504
## regionEurope_Eastern                   0.0034103  0.0300955   0.113 0.909836
## regionEurope_Non_EU                   -0.0742120  0.0213833  -3.471 0.000574
## regionEuropean Free Trade Association -0.0004722  0.0202714  -0.023 0.981427
## regionJapan                            0.0429533  0.0218885   1.962 0.050387
## regionMexico                          -0.0756466  0.0217520  -3.478 0.000559
## regionRussia                           0.1282448  0.0237003   5.411 1.06e-07
## regionSouth America_Northern          -0.0288557  0.0207183  -1.393 0.164437
## regionSouth Korea                     -0.0041704  0.0205028  -0.203 0.838916
## regionSoutheast Asia                  -0.0038444  0.0263813  -0.146 0.884209
## regionTaiwan                          -0.0160890  0.0203103  -0.792 0.428722
## regionUSA                              0.1510049  0.0445830   3.387 0.000774
##                                          
## nonstaples                            ***
## regionArgentina                          
## regionAustralia_NZ                    ***
## regionBrazil                          ***
## regionCanada                             
## regionCentral Asia                       
## regionChina                           *  
## regionColombia                           
## regionEU-12                              
## regionEU-15                              
## regionEurope_Eastern                     
## regionEurope_Non_EU                   ***
## regionEuropean Free Trade Association    
## regionJapan                           .  
## regionMexico                          ***
## regionRussia                          ***
## regionSouth America_Northern             
## regionSouth Korea                        
## regionSoutheast Asia                     
## regionTaiwan                             
## regionUSA                             ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.103 on 415 degrees of freedom
## Multiple R-squared:  0.9637, Adjusted R-squared:  0.9619 
## F-statistic:   525 on 21 and 415 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65396 -0.01592 -0.00002  0.01969  0.33582 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.040165   0.103119   0.390  0.69710    
## nonstaples                             2.531486   0.118901  21.291  < 2e-16 ***
## regionAustralia_NZ                     0.040495   0.105016   0.386  0.69998    
## regionBrazil                           0.322032   0.105631   3.049  0.00245 ** 
## regionCanada                          -0.072178   0.107632  -0.671  0.50285    
## regionCentral Asia                    -0.115032   0.126224  -0.911  0.36265    
## regionChina                           -0.193418   0.120761  -1.602  0.10999    
## regionColombia                        -0.051534   0.105015  -0.491  0.62388    
## regionEU-12                           -0.037230   0.105201  -0.354  0.72360    
## regionEU-15                           -0.015456   0.113624  -0.136  0.89187    
## regionEurope_Eastern                  -0.036755   0.107260  -0.343  0.73202    
## regionEurope_Non_EU                   -0.114377   0.105061  -1.089  0.27693    
## regionEuropean Free Trade Association -0.040638   0.105036  -0.387  0.69903    
## regionJapan                            0.002788   0.105113   0.027  0.97885    
## regionMexico                          -0.115812   0.105098  -1.102  0.27113    
## regionRussia                           0.088079   0.105415   0.836  0.40389    
## regionSouth America_Northern          -0.069021   0.105103  -0.657  0.51174    
## regionSouth Korea                     -0.044336   0.105013  -0.422  0.67310    
## regionSoutheast Asia                  -0.044010   0.105893  -0.416  0.67791    
## regionTaiwan                          -0.056254   0.105027  -0.536  0.59251    
## regionUSA                              0.110839   0.110993   0.999  0.31856    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.103 on 415 degrees of freedom
## Multiple R-squared:  0.9401, Adjusted R-squared:  0.9372 
## F-statistic: 325.7 on 20 and 415 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + GDP, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59170 -0.05228 -0.03204  0.02548  0.62888 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.652e-02  8.548e-03   3.102  0.00205 ** 
## nonstaples  2.101e+00  6.483e-02  32.408  < 2e-16 ***
## GDP         1.849e-11  2.329e-12   7.940 1.75e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.14 on 433 degrees of freedom
## Multiple R-squared:  0.8846, Adjusted R-squared:  0.8841 
## F-statistic:  1660 on 2 and 433 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + GDP + region, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45238 -0.01651 -0.00009  0.01962  0.25855 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            3.383e-02  8.588e-02   0.394   0.6938
## nonstaples                             1.407e+00  1.291e-01  10.894  < 2e-16
## GDP                                    6.523e-11  4.804e-12  13.577  < 2e-16
## regionAustralia_NZ                     8.079e-03  8.749e-02   0.092   0.9265
## regionBrazil                           3.841e-01  8.809e-02   4.361 1.64e-05
## regionCanada                          -1.186e-01  8.970e-02  -1.322   0.1869
## regionCentral Asia                    -8.116e-02  1.052e-01  -0.772   0.4406
## regionChina                            1.058e-01  1.030e-01   1.028   0.3048
## regionColombia                        -2.795e-02  8.748e-02  -0.320   0.7495
## regionEU-12                            6.588e-03  8.767e-02   0.075   0.9401
## regionEU-15                           -3.904e-01  9.858e-02  -3.961 8.80e-05
## regionEurope_Eastern                   2.151e-03  8.937e-02   0.024   0.9808
## regionEurope_Non_EU                   -8.001e-02  8.753e-02  -0.914   0.3612
## regionEuropean Free Trade Association -7.704e-02  8.752e-02  -0.880   0.3792
## regionJapan                           -1.731e-01  8.849e-02  -1.957   0.0511
## regionMexico                          -9.046e-02  8.755e-02  -1.033   0.3021
## regionRussia                           1.339e-01  8.786e-02   1.524   0.1283
## regionSouth America_Northern          -6.159e-02  8.753e-02  -0.704   0.4821
## regionSouth Korea                     -6.491e-02  8.747e-02  -0.742   0.4585
## regionSoutheast Asia                   4.433e-02  8.843e-02   0.501   0.6164
## regionTaiwan                          -3.081e-02  8.749e-02  -0.352   0.7249
## regionUSA                             -4.304e-01  1.007e-01  -4.275 2.37e-05
##                                          
## (Intercept)                              
## nonstaples                            ***
## GDP                                   ***
## regionAustralia_NZ                       
## regionBrazil                          ***
## regionCanada                             
## regionCentral Asia                       
## regionChina                              
## regionColombia                           
## regionEU-12                              
## regionEU-15                           ***
## regionEurope_Eastern                     
## regionEurope_Non_EU                      
## regionEuropean Free Trade Association    
## regionJapan                           .  
## regionMexico                             
## regionRussia                             
## regionSouth America_Northern             
## regionSouth Korea                        
## regionSoutheast Asia                     
## regionTaiwan                             
## regionUSA                             ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08582 on 414 degrees of freedom
## Multiple R-squared:  0.9586, Adjusted R-squared:  0.9565 
## F-statistic:   456 on 21 and 414 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ 0 + nonstaples + staples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60034 -0.03229 -0.00884  0.05340  0.58710 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## nonstaples  2.98542    0.06334  47.137  < 2e-16 ***
## staples    -0.39135    0.05075  -7.711  8.6e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1421 on 434 degrees of freedom
## Multiple R-squared:  0.9279, Adjusted R-squared:  0.9275 
## F-statistic:  2791 on 2 and 434 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = EJ ~ nonstaples + staples, data = used_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59751 -0.05314 -0.02936  0.03656  0.57528 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.024456   0.008625   2.836  0.00479 ** 
## nonstaples   2.895952   0.070305  41.191  < 2e-16 ***
## staples     -0.379541   0.050515  -7.513 3.33e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.141 on 433 degrees of freedom
## Multiple R-squared:  0.8831, Adjusted R-squared:  0.8825 
## F-statistic:  1635 on 2 and 433 DF,  p-value: < 2.2e-16

The overall coefficients are relatively similar when calculated from the processed GCAM input data and when calculated from the FAO data more directly, either on a country level or a GCAM region level, (generally <20% difference, except for the non-staples/staples separated model, which has larger differences between the coefficients), which is good validation. The intercepts do vary a bit more. The coefficients are also more different for GDP, but the units (and calibration/values) for GDP are different.